Patchwork [6,of,8] py3: use unicode literals and unichr in revset.py

login
register
mail settings
Submitter Pulkit Goyal
Date Aug. 7, 2016, 7:54 p.m.
Message ID <da4a0ba184d3eff2819d.1470599679@waste.org>
Download mbox | patch
Permalink /patch/16185/
State Not Applicable
Headers show

Comments

Pulkit Goyal - Aug. 7, 2016, 7:54 p.m.
# HG changeset patch
# User Pulkit Goyal <7895pulkit@gmail.com>
# Date 1470168209 -19800
#      Wed Aug 03 01:33:29 2016 +0530
# Node ID da4a0ba184d3eff2819d73884770d342edce88c1
# Parent  4547ab529d26196dc40909693b5e9673763e9058
py3: use unicode literals and unichr in revset.py

The assignment of _syminitletters, _symletters, and _aliassyminitletters
didn't work under Python 3 because of mixed types. We rewrite the code
to work under both Python 2 and Python 3 by using unichr and
unicode literals.

We preserve the final type of elements in the sets as bytes.

Patch

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -9,6 +9,7 @@ 
 
 import heapq
 import re
+import sys
 
 from .i18n import _
 from . import (
@@ -27,6 +28,9 @@ 
     util,
 )
 
+if sys.version_info[0]>=3:
+    unichr = chr
+
 def _revancestors(repo, revs, followfirst):
     """Like revlog.ancestors(), but supports followfirst."""
     if followfirst:
@@ -175,12 +179,12 @@ 
 keywords = set(['and', 'or', 'not'])
 
 # default set of valid characters for the initial letter of symbols
-_syminitletters = set(c for c in [chr(i) for i in xrange(256)]
-                      if c.isalnum() or c in '._@' or ord(c) > 127)
+_syminitletters = set(c.encode("latin1") for c in [unichr(i) for i in xrange(256)]
+                      if c.isalnum() or c in u'._@' or ord(c) > 127)
 
 # default set of valid characters for non-initial letters of symbols
-_symletters = set(c for c in  [chr(i) for i in xrange(256)]
-                  if c.isalnum() or c in '-._/@' or ord(c) > 127)
+_symletters = set(c.encode("latin-1") for c in  [unichr(i) for i in xrange(256)]
+                  if c.isalnum() or c in u'-._/@' or ord(c) > 127)
 
 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
     '''
@@ -2450,8 +2454,8 @@ 
 
 # the set of valid characters for the initial letter of symbols in
 # alias declarations and definitions
-_aliassyminitletters = set(c for c in [chr(i) for i in xrange(256)]
-                           if c.isalnum() or c in '._@$' or ord(c) > 127)
+_aliassyminitletters = set(c.encode("latin-1") for c in [unichr(i) for i in xrange(256)]
+                           if c.isalnum() or c in u'._@$' or ord(c) > 127)
 
 def _parsewith(spec, lookup=None, syminitletters=None):
     """Generate a parse tree of given spec with given tokenizing options