Patchwork D8023: chg: read CHGORIG_ values from env and handle these appropriately

login
register
mail settings
Submitter phabricator
Date Feb. 4, 2020, 9:55 p.m.
Message ID <a3261be6fbb73e39a3c68810ac792c40@localhost.localdomain>
Download mbox | patch
Permalink /patch/44922/
State Not Applicable
Headers show

Comments

phabricator - Feb. 4, 2020, 9:55 p.m.
spectral updated this revision to Diff 19875.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D8023?vs=19652&id=19875

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8023/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8023

AFFECTED FILES
  mercurial/chgserver.py
  tests/test-chg.t

CHANGE DETAILS




To: spectral, #hg-reviewers
Cc: mjpieters, mercurial-devel

Patch

diff --git a/tests/test-chg.t b/tests/test-chg.t
--- a/tests/test-chg.t
+++ b/tests/test-chg.t
@@ -332,8 +332,7 @@ 
   YYYY/MM/DD HH:MM:SS (PID)> log -R cached
   YYYY/MM/DD HH:MM:SS (PID)> loaded repo into cache: $TESTTMP/cached (in  ...s)
 
-Test that chg works even when python "coerces" the locale (py3.7+, which is done
-by default if none of LC_ALL, LC_CTYPE, or LANG are set in the environment)
+Test that chg works even when python "coerces" the locale (py3.7+)
 
   $ cat > $TESTTMP/debugenv.py <<EOF
   > from mercurial import encoding
@@ -345,11 +344,31 @@ 
   >     for k in [b'LC_ALL', b'LC_CTYPE', b'LANG']:
   >         v = encoding.environ.get(k)
   >         if v is not None:
-  >             ui.write(b'%s=%s\n' % (k, encoding.environ[k]))
+  >             ui.write(b'debugenv: %s=%s\n' % (k, encoding.environ[k]))
   > EOF
-  $ LANG= LC_ALL= LC_CTYPE= chg \
-  >    --config extensions.debugenv=$TESTTMP/debugenv.py debugenv
-  LC_ALL=
-  LC_CTYPE=C.UTF-8 (py37 !)
-  LC_CTYPE= (no-py37 !)
-  LANG=
+  $ CHGDEBUG= LANG= LC_ALL= LC_CTYPE= chg \
+  >    --config extensions.debugenv=$TESTTMP/debugenv.py debugenv 2>&1 \
+  >    | egrep 'debugenv|start'
+  chg: debug: * start cmdserver at * (glob)
+  debugenv: LC_ALL=
+  debugenv: LC_CTYPE=C.UTF-8 (py37 !)
+  debugenv: LC_CTYPE= (no-py37 !)
+  debugenv: LANG=
+(Should not trigger a command server restart)
+  $ CHGDEBUG= LANG= LC_ALL= LC_CTYPE= chg \
+  >    --config extensions.debugenv=$TESTTMP/debugenv.py debugenv 2>&1 \
+  >    | egrep 'debugenv|start'
+  debugenv: LC_ALL=
+  debugenv: LC_CTYPE=C.UTF-8 (py37 !)
+  debugenv: LC_CTYPE= (no-py37 !)
+  debugenv: LANG=
+(Should trigger a command server restart since the LC_CTYPE=C value should be
+used, not the coerced one)
+  $ CHGDEBUG= LANG= LC_ALL= LC_CTYPE=C chg \
+  >    --config extensions.debugenv=$TESTTMP/debugenv.py debugenv 2>&1 \
+  >    | egrep 'debugenv|start'
+  chg: debug: * start cmdserver at * (glob)
+  debugenv: LC_ALL=
+  debugenv: LC_CTYPE=C.UTF-8 (py37 !)
+  debugenv: LC_CTYPE=C (no-py37 !)
+  debugenv: LANG=
diff --git a/mercurial/chgserver.py b/mercurial/chgserver.py
--- a/mercurial/chgserver.py
+++ b/mercurial/chgserver.py
@@ -129,12 +129,37 @@ 
         ignored = {b'HG'}
     else:
         ignored = set()
-    envitems = [
-        (k, v)
+    envitems = {
+        k: v
         for k, v in pycompat.iteritems(encoding.environ)
         if _envre.match(k) and k not in ignored
-    ]
-    envhash = _hashlist(sorted(envitems))
+    }
+    # Py3 may modify environment variables before `hg serve` starts. When chg
+    # connects to us, it passes in its environment (without the modifications),
+    # and we'll generate a different config hash.
+    #
+    # To handle this situation, chg will pass the variables through using
+    # CHGORIG_<envname>=<0 if not in env, 1 if in env><value, if 1>, and we use
+    # *that* value only when calculating the hash. Examples:
+    #   CHGORIG_LC_CTYPE=0   <-- not in chg's env, delete for hash
+    #   CHGORIG_LC_CTYPE=1   <-- in chg's env, but empty. Set to empty for hash.
+    #   CHGORIG_LC_CTYPE=1UTF-8  <-- set to UTF-8 for hash.
+    #
+    # This does NOT cause us to lose Py3's modifications to these variables (we
+    # aren't copying this into our environment), so chg and hg should continue
+    # to behave the same - both will operate with the modifications Py3 made.
+    for k, v in pycompat.iteritems(encoding.environ):
+        if k.startswith(b'CHGORIG_'):
+            origname = k[len(b'CHGORIG_') :]
+            if not _envre.match(origname) or origname in ignored:
+                continue
+
+            if v.startswith(b'0') and origname in envitems:
+                del envitems[origname]
+            else:
+                envitems[origname] = v[1:]
+
+    envhash = _hashlist(sorted(pycompat.iteritems(envitems)))
     return sectionhash[:6] + envhash[:6]
 
 
@@ -550,39 +575,18 @@ 
             raise ValueError(b'unexpected value in setenv request')
         self.ui.log(b'chgserver', b'setenv: %r\n', sorted(newenv.keys()))
 
-        # Python3 has some logic to "coerce" the C locale to a UTF-8 capable
-        # one, and it sets LC_CTYPE in the environment to C.UTF-8 if none of
-        # 'LC_CTYPE', 'LC_ALL' or 'LANG' are set (to any value). This can be
-        # disabled with PYTHONCOERCECLOCALE=0 in the environment.
-        #
-        # When fromui is called via _inithashstate, python has already set
-        # this, so that's in the environment right when we start up the hg
-        # process. Then chg will call us and tell us to set the environment to
-        # the one it has; this might NOT have LC_CTYPE, so we'll need to
-        # carry-forward the LC_CTYPE that was coerced in these situations.
-        #
-        # If this is not handled, we will fail config+env validation and fail
-        # to start chg. If this is just ignored instead of carried forward, we
-        # may have different behavior between chg and non-chg.
-        if pycompat.ispy3:
-            # Rename for wordwrapping purposes
-            oldenv = encoding.environ
-            if not any(
-                e.get(b'PYTHONCOERCECLOCALE') == b'0' for e in [oldenv, newenv]
-            ):
-                keys = [b'LC_CTYPE', b'LC_ALL', b'LANG']
-                old_keys = [k for k, v in oldenv.items() if k in keys and v]
-                new_keys = [k for k, v in newenv.items() if k in keys and v]
-                # If the user's environment (from chg) doesn't have ANY of the
-                # keys that python looks for, and the environment (from
-                # initialization) has ONLY LC_CTYPE and it's set to C.UTF-8,
-                # carry it forward.
-                if (
-                    not new_keys
-                    and old_keys == [b'LC_CTYPE']
-                    and oldenv[b'LC_CTYPE'] == b'C.UTF-8'
-                ):
-                    newenv[b'LC_CTYPE'] = oldenv[b'LC_CTYPE']
+        # We want to *preserve* these variables in the environment across the
+        # clear, since Python may have modified them during startup, and we want
+        # to preserve that modification.
+        preserve = [k for k in encoding.environ if k.startswith(b'CHGORIG_')]
+        for chgorig in preserve:
+            actual = chgorig[8:]
+            if actual in newenv:
+                newenv[chgorig] = b'1' + newenv[actual]
+            else:
+                newenv[chgorig] = b'0'
+            # Keep the value as it was coerced by python
+            newenv[actual] = encoding.environ[actual]
 
         encoding.environ.clear()
         encoding.environ.update(newenv)