hgext/highlight/highlight.py
author Kevin Bullock <kbullock+mercurial@ringworld.org>
Wed, 01 Feb 2017 10:15:10 -0600
branchstable
changeset 30861 e1526da1e6d8
parent 29485 6a98f9408a50
child 32908 661025fd3e1c
permissions -rw-r--r--
merge with i18n
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
8251
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     1
# highlight.py - highlight extension implementation file
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     2
#
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     3
#  Copyright 2007-2009 Adam Hupp <adam@hupp.org> and others
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     4
#
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     5
# This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 9424
diff changeset
     6
# GNU General Public License version 2 or any later version.
6938
ce94b3236ea4 highlight: split code to improve startup times
Patrick Mezard <pmezard@gmail.com>
parents: 6666
diff changeset
     7
#
ce94b3236ea4 highlight: split code to improve startup times
Patrick Mezard <pmezard@gmail.com>
parents: 6666
diff changeset
     8
# The original module was split in an interface and an implementation
ce94b3236ea4 highlight: split code to improve startup times
Patrick Mezard <pmezard@gmail.com>
parents: 6666
diff changeset
     9
# file to defer pygments loading and speedup extension setup.
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    10
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    11
from __future__ import absolute_import
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    12
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    13
import pygments
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    14
import pygments.formatters
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    15
import pygments.lexers
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    16
import pygments.util
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    17
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    18
from mercurial import demandimport
10394
4612cded5176 fix coding style (reported by pylint)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 10263
diff changeset
    19
demandimport.ignore.extend(['pkgutil', 'pkg_resources', '__main__'])
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    20
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    21
from mercurial import (
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    22
    encoding,
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    23
    util,
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    24
)
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    25
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    26
highlight = pygments.highlight
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    27
ClassNotFound = pygments.util.ClassNotFound
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    28
guess_lexer = pygments.lexers.guess_lexer
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    29
guess_lexer_for_filename = pygments.lexers.guess_lexer_for_filename
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    30
TextLexer = pygments.lexers.TextLexer
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    31
HtmlFormatter = pygments.formatters.HtmlFormatter
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    32
6485
938319418d8c highlight: Generate pygments style sheet dynamically
Isaac Jurado <diptongo@gmail.com>
parents: 6394
diff changeset
    33
SYNTAX_CSS = ('\n<link rel="stylesheet" href="{url}highlightcss" '
5533
6cf7d7fe7d3d highlight: clean up coding style a little
Bryan O'Sullivan <bos@serpentine.com>
parents: 5532
diff changeset
    34
              'type="text/css" />')
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    35
26680
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    36
def pygmentize(field, fctx, style, tmpl, guessfilenameonly=False):
6394
55bc0a035e1f highlight: some small cleanups
Dirkjan Ochtman <dirkjan@ochtman.nl>
parents: 6393
diff changeset
    37
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    38
    # append a <link ...> to the syntax highlighting css
10959
d1f4657f55e4 highlight: fix to work with caching templater
Matt Mackall <mpm@selenic.com>
parents: 10394
diff changeset
    39
    old_header = tmpl.load('header')
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    40
    if SYNTAX_CSS not in old_header:
27637
b502138f5faa cleanup: remove superfluous space after space after equals (python)
timeless <timeless@mozdev.org>
parents: 26680
diff changeset
    41
        new_header = old_header + SYNTAX_CSS
5616
88ca3e0fb6e5 highlight: adapt to hgweb_mode refactoring
Christian Ebert <blacktrash@gmx.net>
parents: 5533
diff changeset
    42
        tmpl.cache['header'] = new_header
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    43
6194
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    44
    text = fctx.data()
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    45
    if util.binary(text):
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    46
        return
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    47
23613
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    48
    # str.splitlines() != unicode.splitlines() because "reasons"
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    49
    for c in "\x0c\x1c\x1d\x1e":
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    50
        if c in text:
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    51
            text = text.replace(c, '')
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    52
9424
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    53
    # Pygments is best used with Unicode strings:
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    54
    # <http://pygments.org/docs/unicode/>
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    55
    text = text.decode(encoding.encoding, 'replace')
7120
db7557359636 highlight: convert text to local before passing to pygmentize (issue1341)
Christian Ebert <blacktrash@gmx.net>
parents: 6938
diff changeset
    56
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    57
    # To get multi-line strings right, we can't format line-by-line
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    58
    try:
19169
bcdfb6078b9f highlight: fix page layout with empty first and last lines
Alexander Plavin <me@aplavin.ru>
parents: 18054
diff changeset
    59
        lexer = guess_lexer_for_filename(fctx.path(), text[:1024],
bcdfb6078b9f highlight: fix page layout with empty first and last lines
Alexander Plavin <me@aplavin.ru>
parents: 18054
diff changeset
    60
                                         stripnl=False)
6494
c30849d4c8ba highlight: backward compatibility with pygments 0.5.1
Benoit Allard <benoit@aeteurope.nl>
parents: 6212
diff changeset
    61
    except (ClassNotFound, ValueError):
26680
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    62
        # guess_lexer will return a lexer if *any* lexer matches. There is
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    63
        # no way to specify a minimum match score. This can give a high rate of
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    64
        # false positives on files with an unknown filename pattern.
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    65
        if guessfilenameonly:
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    66
            return
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    67
6198
358cc9cf54db highlight: guess by text when path name is ambiguous
Brendan Cully <brendan@kublai.com>
parents: 6197
diff changeset
    68
        try:
19169
bcdfb6078b9f highlight: fix page layout with empty first and last lines
Alexander Plavin <me@aplavin.ru>
parents: 18054
diff changeset
    69
            lexer = guess_lexer(text[:1024], stripnl=False)
6494
c30849d4c8ba highlight: backward compatibility with pygments 0.5.1
Benoit Allard <benoit@aeteurope.nl>
parents: 6212
diff changeset
    70
        except (ClassNotFound, ValueError):
25899
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    71
            # Don't highlight unknown files
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    72
            return
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    73
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    74
    # Don't highlight text files
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    75
    if isinstance(lexer, TextLexer):
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    76
        return
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    77
25867
a74e9806d17d highlight: produce correct markup when there's a blank line just before EOF
Anton Shestakov <av6@dwimlabs.net>
parents: 23613
diff changeset
    78
    formatter = HtmlFormatter(nowrap=True, style=style)
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    79
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    80
    colorized = highlight(text, lexer, formatter)
9424
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    81
    coloriter = (s.encode(encoding.encoding, 'replace')
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    82
                 for s in colorized.splitlines())
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    83
29216
ead25aa27a43 py3: convert to next() function
timeless <timeless@mozdev.org>
parents: 27637
diff changeset
    84
    tmpl.filters['colorize'] = lambda x: next(coloriter)
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    85
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    86
    oldl = tmpl.cache[field]
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    87
    newl = oldl.replace('line|escape', 'line|colorize')
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    88
    tmpl.cache[field] = newl