Patchwork D8039: chg: force-set LC_CTYPE on server start to actual value from the environment

login
register
mail settings
Submitter phabricator
Date Feb. 6, 2020, 2:32 p.m.
Message ID <915b387366266a932d5962c733a2f529@localhost.localdomain>
Download mbox | patch
Permalink /patch/44974/
State Not Applicable
Headers show

Comments

phabricator - Feb. 6, 2020, 2:32 p.m.
Closed by commit rHG04a3ae7aba14: chg: force-set LC_CTYPE on server start to actual value from the environment (authored by spectral).
This revision was automatically updated to reflect the committed changes.
This revision was not accepted when it landed; it landed in state "Needs Review".

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D8039?vs=19876&id=19933

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8039/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8039

AFFECTED FILES
  contrib/chg/chg.c
  mercurial/chgserver.py
  tests/test-chg.t

CHANGE DETAILS




To: spectral, #hg-reviewers
Cc: quark, yuja, mjpieters, mercurial-devel

Patch

diff --git a/tests/test-chg.t b/tests/test-chg.t
--- a/tests/test-chg.t
+++ b/tests/test-chg.t
@@ -332,8 +332,8 @@ 
   YYYY/MM/DD HH:MM:SS (PID)> log -R cached
   YYYY/MM/DD HH:MM:SS (PID)> loaded repo into cache: $TESTTMP/cached (in  ...s)
 
-Test that chg works even when python "coerces" the locale (py3.7+, which is done
-by default if none of LC_ALL, LC_CTYPE, or LANG are set in the environment)
+Test that chg works (sets to the user's actual LC_CTYPE) even when python
+"coerces" the locale (py3.7+)
 
   $ cat > $TESTTMP/debugenv.py <<EOF
   > from mercurial import encoding
@@ -347,9 +347,22 @@ 
   >         if v is not None:
   >             ui.write(b'%s=%s\n' % (k, encoding.environ[k]))
   > EOF
+(hg keeps python's modified LC_CTYPE, chg doesn't)
+  $ (unset LC_ALL; unset LANG; LC_CTYPE= "$CHGHG" \
+  >    --config extensions.debugenv=$TESTTMP/debugenv.py debugenv)
+  LC_CTYPE=C.UTF-8 (py37 !)
+  LC_CTYPE= (no-py37 !)
+  $ (unset LC_ALL; unset LANG; LC_CTYPE= chg \
+  >    --config extensions.debugenv=$TESTTMP/debugenv.py debugenv)
+  LC_CTYPE=
+  $ (unset LC_ALL; unset LANG; LC_CTYPE=unsupported_value chg \
+  >    --config extensions.debugenv=$TESTTMP/debugenv.py debugenv)
+  LC_CTYPE=unsupported_value
+  $ (unset LC_ALL; unset LANG; LC_CTYPE= chg \
+  >    --config extensions.debugenv=$TESTTMP/debugenv.py debugenv)
+  LC_CTYPE=
   $ LANG= LC_ALL= LC_CTYPE= chg \
   >    --config extensions.debugenv=$TESTTMP/debugenv.py debugenv
   LC_ALL=
-  LC_CTYPE=C.UTF-8 (py37 !)
-  LC_CTYPE= (no-py37 !)
+  LC_CTYPE=
   LANG=
diff --git a/mercurial/chgserver.py b/mercurial/chgserver.py
--- a/mercurial/chgserver.py
+++ b/mercurial/chgserver.py
@@ -550,40 +550,6 @@ 
             raise ValueError(b'unexpected value in setenv request')
         self.ui.log(b'chgserver', b'setenv: %r\n', sorted(newenv.keys()))
 
-        # Python3 has some logic to "coerce" the C locale to a UTF-8 capable
-        # one, and it sets LC_CTYPE in the environment to C.UTF-8 if none of
-        # 'LC_CTYPE', 'LC_ALL' or 'LANG' are set (to any value). This can be
-        # disabled with PYTHONCOERCECLOCALE=0 in the environment.
-        #
-        # When fromui is called via _inithashstate, python has already set
-        # this, so that's in the environment right when we start up the hg
-        # process. Then chg will call us and tell us to set the environment to
-        # the one it has; this might NOT have LC_CTYPE, so we'll need to
-        # carry-forward the LC_CTYPE that was coerced in these situations.
-        #
-        # If this is not handled, we will fail config+env validation and fail
-        # to start chg. If this is just ignored instead of carried forward, we
-        # may have different behavior between chg and non-chg.
-        if pycompat.ispy3:
-            # Rename for wordwrapping purposes
-            oldenv = encoding.environ
-            if not any(
-                e.get(b'PYTHONCOERCECLOCALE') == b'0' for e in [oldenv, newenv]
-            ):
-                keys = [b'LC_CTYPE', b'LC_ALL', b'LANG']
-                old_keys = [k for k, v in oldenv.items() if k in keys and v]
-                new_keys = [k for k, v in newenv.items() if k in keys and v]
-                # If the user's environment (from chg) doesn't have ANY of the
-                # keys that python looks for, and the environment (from
-                # initialization) has ONLY LC_CTYPE and it's set to C.UTF-8,
-                # carry it forward.
-                if (
-                    not new_keys
-                    and old_keys == [b'LC_CTYPE']
-                    and oldenv[b'LC_CTYPE'] == b'C.UTF-8'
-                ):
-                    newenv[b'LC_CTYPE'] = oldenv[b'LC_CTYPE']
-
         encoding.environ.clear()
         encoding.environ.update(newenv)
 
@@ -730,6 +696,16 @@ 
     # environ cleaner.
     if b'CHGINTERNALMARK' in encoding.environ:
         del encoding.environ[b'CHGINTERNALMARK']
+    # Python3.7+ "coerces" the LC_CTYPE environment variable to a UTF-8 one if
+    # it thinks the current value is "C". This breaks the hash computation and
+    # causes chg to restart loop.
+    if b'CHGORIG_LC_CTYPE' in encoding.environ:
+        encoding.environ[b'LC_CTYPE'] = encoding.environ[b'CHGORIG_LC_CTYPE']
+        del encoding.environ[b'CHGORIG_LC_CTYPE']
+    elif b'CHG_CLEAR_LC_CTYPE' in encoding.environ:
+        if b'LC_CTYPE' in encoding.environ:
+            del encoding.environ[b'LC_CTYPE']
+        del encoding.environ[b'CHG_CLEAR_LC_CTYPE']
 
     if repo:
         # one chgserver can serve multiple repos. drop repo information
diff --git a/contrib/chg/chg.c b/contrib/chg/chg.c
--- a/contrib/chg/chg.c
+++ b/contrib/chg/chg.c
@@ -226,6 +226,16 @@ 
 	}
 	argv[argsize - 1] = NULL;
 
+	const char *lc_ctype_env = getenv("LC_CTYPE");
+	if (lc_ctype_env == NULL) {
+		if (putenv("CHG_CLEAR_LC_CTYPE=") != 0)
+			abortmsgerrno("failed to putenv CHG_CLEAR_LC_CTYPE");
+	} else {
+		if (setenv("CHGORIG_LC_CTYPE", lc_ctype_env, 1) != 0) {
+			abortmsgerrno("failed to setenv CHGORIG_LC_CTYYPE");
+		}
+	}
+
 	if (putenv("CHGINTERNALMARK=") != 0)
 		abortmsgerrno("failed to putenv");
 	if (execvp(hgcmd, (char **)argv) < 0)