From patchwork Mon Sep 9 19:22:33 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: D6832: highlight: fix encoding issues to enable Py3 compatibility From: phabricator X-Patchwork-Id: 41596 Message-Id: To: Phabricator Cc: mercurial-devel@mercurial-scm.org Date: Mon, 9 Sep 2019 19:22:33 +0000 sheehan created this revision. Herald added a reviewer: pulkit. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY This commit fixes various encoding issues with the `highlight` extension to enable compatibility with Python 3. Python `.encode()` and `.decode()` requires the target encoding to be passed as a `str`, so the value of `mercurial.encoding.encoding` must be converted before passing to the function. Pygments also assumes the `str` type for values it works with, so we must perform conversions before and after receiving values from its APIs. After applying this patch, `test-highlight.t` passes under Python 3. We add it to `python3-whitelist` as well. Tested with Pygments 2.4.2. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D6832 AFFECTED FILES contrib/python3-whitelist hgext/highlight/__init__.py hgext/highlight/highlight.py CHANGE DETAILS To: sheehan, pulkit, #hg-reviewers Cc: mercurial-devel diff --git a/hgext/highlight/highlight.py b/hgext/highlight/highlight.py --- a/hgext/highlight/highlight.py +++ b/hgext/highlight/highlight.py @@ -15,6 +15,7 @@ from mercurial import ( encoding, + pycompat, ) from mercurial.utils import ( @@ -61,11 +62,12 @@ # Pygments is best used with Unicode strings: # - text = text.decode(encoding.encoding, 'replace') + text = text.decode(pycompat.sysstr(encoding.encoding), 'replace') # To get multi-line strings right, we can't format line-by-line try: - lexer = guess_lexer_for_filename(fctx.path(), text[:1024], + path = pycompat.sysstr(fctx.path()) + lexer = guess_lexer_for_filename(path, text[:1024], stripnl=False) except (ClassNotFound, ValueError): # guess_lexer will return a lexer if *any* lexer matches. There is @@ -84,10 +86,10 @@ if isinstance(lexer, TextLexer): return - formatter = HtmlFormatter(nowrap=True, style=style) + formatter = HtmlFormatter(nowrap=True, style=pycompat.sysstr(style)) colorized = highlight(text, lexer, formatter) - coloriter = (s.encode(encoding.encoding, 'replace') + coloriter = (s.encode(pycompat.sysstr(encoding.encoding), 'replace') for s in colorized.splitlines()) tmpl._filters['colorize'] = lambda x: next(coloriter) diff --git a/hgext/highlight/__init__.py b/hgext/highlight/__init__.py --- a/hgext/highlight/__init__.py +++ b/hgext/highlight/__init__.py @@ -36,6 +36,7 @@ from mercurial import ( extensions, + pycompat, ) # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for @@ -79,11 +80,12 @@ def generate_css(web): pg_style = web.config('web', 'pygments_style', 'colorful') - fmter = highlight.HtmlFormatter(style=pg_style) + fmter = highlight.HtmlFormatter(style=pycompat.sysstr(pg_style)) web.res.headers['Content-Type'] = 'text/css' + style_defs = fmter.get_style_defs(pycompat.sysstr('')) web.res.setbodybytes(''.join([ '/* pygments_style = %s */\n\n' % pg_style, - fmter.get_style_defs(''), + pycompat.bytestr(style_defs), ])) return web.res.sendresponse() diff --git a/contrib/python3-whitelist b/contrib/python3-whitelist --- a/contrib/python3-whitelist +++ b/contrib/python3-whitelist @@ -296,6 +296,7 @@ test-hgwebdir-paths.py test-hgwebdir.t test-hgwebdirsym.t +test-highlight.t test-histedit-arguments.t test-histedit-base.t test-histedit-bookmark-motion.t