hgext/highlight/highlight.py
author Joerg Sonnenberger <joerg@bec.de>
Fri, 30 Apr 2021 02:11:58 +0200
changeset 47043 12450fbea288
parent 43597 856cce0c255c
child 48875 6000f5b25c9b
permissions -rw-r--r--
manifests: push down expected node length into the parser This strictly enforces the node length in the manifest lines according to what the repository expects. One test case moves large hash testing into the non-treemanifest part as treemanifests don't provide an interface for overriding just the node length for now. Differential Revision: https://phab.mercurial-scm.org/D10533
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
8251
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     1
# highlight.py - highlight extension implementation file
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     2
#
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     3
#  Copyright 2007-2009 Adam Hupp <adam@hupp.org> and others
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     4
#
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     5
# This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 9424
diff changeset
     6
# GNU General Public License version 2 or any later version.
6938
ce94b3236ea4 highlight: split code to improve startup times
Patrick Mezard <pmezard@gmail.com>
parents: 6666
diff changeset
     7
#
ce94b3236ea4 highlight: split code to improve startup times
Patrick Mezard <pmezard@gmail.com>
parents: 6666
diff changeset
     8
# The original module was split in an interface and an implementation
ce94b3236ea4 highlight: split code to improve startup times
Patrick Mezard <pmezard@gmail.com>
parents: 6666
diff changeset
     9
# file to defer pygments loading and speedup extension setup.
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    10
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    11
from __future__ import absolute_import
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    12
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    13
from mercurial import demandimport
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 42923
diff changeset
    14
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    15
demandimport.IGNORES.update([b'pkgutil', b'pkg_resources', b'__main__'])
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    16
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    17
from mercurial import (
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    18
    encoding,
42923
a7abc6081bc5 highlight: fix encoding issues to enable Py3 compatibility
Connor Sheehan <sheehan@mozilla.com>
parents: 38383
diff changeset
    19
    pycompat,
37084
f0b6fbea00cf stringutil: bulk-replace call sites to point to new module
Yuya Nishihara <yuya@tcha.org>
parents: 35329
diff changeset
    20
)
f0b6fbea00cf stringutil: bulk-replace call sites to point to new module
Yuya Nishihara <yuya@tcha.org>
parents: 35329
diff changeset
    21
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 42923
diff changeset
    22
from mercurial.utils import stringutil
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    23
32908
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    24
with demandimport.deactivated():
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    25
    import pygments
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    26
    import pygments.formatters
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    27
    import pygments.lexers
35329
169d66db5920 highlight: eagerly discover plugin lexers while demandimport is off
Augie Fackler <augie@google.com>
parents: 32908
diff changeset
    28
    import pygments.plugin
32908
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    29
    import pygments.util
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    30
35329
169d66db5920 highlight: eagerly discover plugin lexers while demandimport is off
Augie Fackler <augie@google.com>
parents: 32908
diff changeset
    31
    for unused in pygments.plugin.find_plugin_lexers():
169d66db5920 highlight: eagerly discover plugin lexers while demandimport is off
Augie Fackler <augie@google.com>
parents: 32908
diff changeset
    32
        pass
169d66db5920 highlight: eagerly discover plugin lexers while demandimport is off
Augie Fackler <augie@google.com>
parents: 32908
diff changeset
    33
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    34
highlight = pygments.highlight
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    35
ClassNotFound = pygments.util.ClassNotFound
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    36
guess_lexer = pygments.lexers.guess_lexer
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    37
guess_lexer_for_filename = pygments.lexers.guess_lexer_for_filename
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    38
TextLexer = pygments.lexers.TextLexer
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    39
HtmlFormatter = pygments.formatters.HtmlFormatter
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    40
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 42923
diff changeset
    41
SYNTAX_CSS = (
43117
8ff1ecfadcd1 cleanup: join string literals that are already on one line
Martin von Zweigbergk <martinvonz@google.com>
parents: 43077
diff changeset
    42
    b'\n<link rel="stylesheet" href="{url}highlightcss" type="text/css" />'
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 42923
diff changeset
    43
)
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 42923
diff changeset
    44
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    45
26680
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    46
def pygmentize(field, fctx, style, tmpl, guessfilenameonly=False):
6394
55bc0a035e1f highlight: some small cleanups
Dirkjan Ochtman <dirkjan@ochtman.nl>
parents: 6393
diff changeset
    47
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    48
    # append a <link ...> to the syntax highlighting css
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    49
    tmpl.load(b'header')
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    50
    old_header = tmpl.cache[b'header']
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    51
    if SYNTAX_CSS not in old_header:
27637
b502138f5faa cleanup: remove superfluous space after space after equals (python)
timeless <timeless@mozdev.org>
parents: 26680
diff changeset
    52
        new_header = old_header + SYNTAX_CSS
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    53
        tmpl.cache[b'header'] = new_header
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    54
6194
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    55
    text = fctx.data()
37084
f0b6fbea00cf stringutil: bulk-replace call sites to point to new module
Yuya Nishihara <yuya@tcha.org>
parents: 35329
diff changeset
    56
    if stringutil.binary(text):
6194
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    57
        return
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    58
23613
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    59
    # str.splitlines() != unicode.splitlines() because "reasons"
43597
856cce0c255c py3: avoid iterating over a literal bytes in highlight
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 43117
diff changeset
    60
    for c in b"\x0c", b"\x1c", b"\x1d", b"\x1e":
23613
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    61
        if c in text:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    62
            text = text.replace(c, b'')
23613
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    63
9424
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    64
    # Pygments is best used with Unicode strings:
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    65
    # <http://pygments.org/docs/unicode/>
42923
a7abc6081bc5 highlight: fix encoding issues to enable Py3 compatibility
Connor Sheehan <sheehan@mozilla.com>
parents: 38383
diff changeset
    66
    text = text.decode(pycompat.sysstr(encoding.encoding), 'replace')
7120
db7557359636 highlight: convert text to local before passing to pygmentize (issue1341)
Christian Ebert <blacktrash@gmx.net>
parents: 6938
diff changeset
    67
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    68
    # To get multi-line strings right, we can't format line-by-line
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    69
    try:
42923
a7abc6081bc5 highlight: fix encoding issues to enable Py3 compatibility
Connor Sheehan <sheehan@mozilla.com>
parents: 38383
diff changeset
    70
        path = pycompat.sysstr(fctx.path())
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 42923
diff changeset
    71
        lexer = guess_lexer_for_filename(path, text[:1024], stripnl=False)
6494
c30849d4c8ba highlight: backward compatibility with pygments 0.5.1
Benoit Allard <benoit@aeteurope.nl>
parents: 6212
diff changeset
    72
    except (ClassNotFound, ValueError):
26680
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    73
        # guess_lexer will return a lexer if *any* lexer matches. There is
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    74
        # no way to specify a minimum match score. This can give a high rate of
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    75
        # false positives on files with an unknown filename pattern.
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    76
        if guessfilenameonly:
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    77
            return
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    78
6198
358cc9cf54db highlight: guess by text when path name is ambiguous
Brendan Cully <brendan@kublai.com>
parents: 6197
diff changeset
    79
        try:
19169
bcdfb6078b9f highlight: fix page layout with empty first and last lines
Alexander Plavin <me@aplavin.ru>
parents: 18054
diff changeset
    80
            lexer = guess_lexer(text[:1024], stripnl=False)
6494
c30849d4c8ba highlight: backward compatibility with pygments 0.5.1
Benoit Allard <benoit@aeteurope.nl>
parents: 6212
diff changeset
    81
        except (ClassNotFound, ValueError):
25899
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    82
            # Don't highlight unknown files
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    83
            return
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    84
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    85
    # Don't highlight text files
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    86
    if isinstance(lexer, TextLexer):
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    87
        return
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    88
42923
a7abc6081bc5 highlight: fix encoding issues to enable Py3 compatibility
Connor Sheehan <sheehan@mozilla.com>
parents: 38383
diff changeset
    89
    formatter = HtmlFormatter(nowrap=True, style=pycompat.sysstr(style))
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    90
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    91
    colorized = highlight(text, lexer, formatter)
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 42923
diff changeset
    92
    coloriter = (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 42923
diff changeset
    93
        s.encode(pycompat.sysstr(encoding.encoding), 'replace')
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 42923
diff changeset
    94
        for s in colorized.splitlines()
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 42923
diff changeset
    95
    )
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    96
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    97
    tmpl._filters[b'colorize'] = lambda x: next(coloriter)
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    98
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    99
    oldl = tmpl.cache[field]
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   100
    newl = oldl.replace(b'line|escape', b'line|colorize')
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
   101
    tmpl.cache[field] = newl