Patchwork D10449: parselist: move the function from config to stringutil

login
register
mail settings
Submitter phabricator
Date April 16, 2021, 12:56 a.m.
Message ID <differential-rev-PHID-DREV-mrwhogomwep5nsz5eat3-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48767/
State Superseded
Headers show

Comments

phabricator - April 16, 2021, 12:56 a.m.
marmoute created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  We move the function in a lower level module to avoid cycle. It moves next to
  `parsebool` who had to migrate for the same reasons.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10449

AFFECTED FILES
  mercurial/config.py
  mercurial/ui.py
  mercurial/utils/stringutil.py
  rust/hg-core/src/config/config.rs
  tests/test-doctest.py

CHANGE DETAILS




To: marmoute, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/test-doctest.py b/tests/test-doctest.py
--- a/tests/test-doctest.py
+++ b/tests/test-doctest.py
@@ -131,7 +131,6 @@ 
         ('mercurial.changelog', '{}'),
         ('mercurial.cmdutil', '{}'),
         ('mercurial.color', '{}'),
-        ('mercurial.config', '{}'),
         ('mercurial.dagparser', "{'optionflags': 4}"),
         ('mercurial.encoding', '{}'),
         ('mercurial.fancyopts', '{}'),
diff --git a/rust/hg-core/src/config/config.rs b/rust/hg-core/src/config/config.rs
--- a/rust/hg-core/src/config/config.rs
+++ b/rust/hg-core/src/config/config.rs
@@ -361,10 +361,11 @@ 
     ///
     /// This is appropriate for new configuration keys. The value syntax is
     /// **not** the same as most existing list-valued config, which has Python
-    /// parsing implemented in `parselist()` in `mercurial/config.py`.
-    /// Faithfully porting that parsing algorithm to Rust (including behavior
-    /// that are arguably bugs) turned out to be non-trivial and hasn’t been
-    /// completed as of this writing.
+    /// parsing implemented in `parselist()` in
+    /// `mercurial/utils/stringutil.py`. Faithfully porting that parsing
+    /// algorithm to Rust (including behavior that are arguably bugs)
+    /// turned out to be non-trivial and hasn’t been completed as of this
+    /// writing.
     ///
     /// Instead, the "simple" syntax is: split on comma, then trim leading and
     /// trailing whitespace of each component. Quotes or backslashes are not
diff --git a/mercurial/utils/stringutil.py b/mercurial/utils/stringutil.py
--- a/mercurial/utils/stringutil.py
+++ b/mercurial/utils/stringutil.py
@@ -868,6 +868,96 @@ 
     return _booleans.get(s.lower(), None)
 
 
+def parselist(value):
+    """parse a configuration value as a list of comma/space separated strings
+
+    >>> parselist(b'this,is "a small" ,test')
+    ['this', 'is', 'a small', 'test']
+    """
+
+    def _parse_plain(parts, s, offset):
+        whitespace = False
+        while offset < len(s) and (
+            s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
+        ):
+            whitespace = True
+            offset += 1
+        if offset >= len(s):
+            return None, parts, offset
+        if whitespace:
+            parts.append(b'')
+        if s[offset : offset + 1] == b'"' and not parts[-1]:
+            return _parse_quote, parts, offset + 1
+        elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
+            parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
+            return _parse_plain, parts, offset + 1
+        parts[-1] += s[offset : offset + 1]
+        return _parse_plain, parts, offset + 1
+
+    def _parse_quote(parts, s, offset):
+        if offset < len(s) and s[offset : offset + 1] == b'"':  # ""
+            parts.append(b'')
+            offset += 1
+            while offset < len(s) and (
+                s[offset : offset + 1].isspace()
+                or s[offset : offset + 1] == b','
+            ):
+                offset += 1
+            return _parse_plain, parts, offset
+
+        while offset < len(s) and s[offset : offset + 1] != b'"':
+            if (
+                s[offset : offset + 1] == b'\\'
+                and offset + 1 < len(s)
+                and s[offset + 1 : offset + 2] == b'"'
+            ):
+                offset += 1
+                parts[-1] += b'"'
+            else:
+                parts[-1] += s[offset : offset + 1]
+            offset += 1
+
+        if offset >= len(s):
+            real_parts = _configlist(parts[-1])
+            if not real_parts:
+                parts[-1] = b'"'
+            else:
+                real_parts[0] = b'"' + real_parts[0]
+                parts = parts[:-1]
+                parts.extend(real_parts)
+            return None, parts, offset
+
+        offset += 1
+        while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
+            offset += 1
+
+        if offset < len(s):
+            if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
+                parts[-1] += b'"'
+                offset += 1
+            else:
+                parts.append(b'')
+        else:
+            return None, parts, offset
+
+        return _parse_plain, parts, offset
+
+    def _configlist(s):
+        s = s.rstrip(b' ,')
+        if not s:
+            return []
+        parser, parts, offset = _parse_plain, [b''], 0
+        while parser:
+            parser, parts, offset = parser(parts, s, offset)
+        return parts
+
+    if value is not None and isinstance(value, bytes):
+        result = _configlist(value.lstrip(b' ,\n'))
+    else:
+        result = value
+    return result or []
+
+
 def evalpythonliteral(s):
     """Evaluate a string containing a Python literal expression"""
     # We could backport our tokenizer hack to rewrite '' to u'' if we want
diff --git a/mercurial/ui.py b/mercurial/ui.py
--- a/mercurial/ui.py
+++ b/mercurial/ui.py
@@ -887,10 +887,10 @@ 
         """
         # default is not always a list
         v = self.configwith(
-            config.parselist, section, name, default, b'list', untrusted
+            stringutil.parselist, section, name, default, b'list', untrusted
         )
         if isinstance(v, bytes):
-            return config.parselist(v)
+            return stringutil.parselist(v)
         elif v is None:
             return []
         return v
diff --git a/mercurial/config.py b/mercurial/config.py
--- a/mercurial/config.py
+++ b/mercurial/config.py
@@ -258,93 +258,3 @@ 
         self.parse(
             path, fp.read(), sections=sections, remap=remap, include=include
         )
-
-
-def parselist(value):
-    """parse a configuration value as a list of comma/space separated strings
-
-    >>> parselist(b'this,is "a small" ,test')
-    ['this', 'is', 'a small', 'test']
-    """
-
-    def _parse_plain(parts, s, offset):
-        whitespace = False
-        while offset < len(s) and (
-            s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
-        ):
-            whitespace = True
-            offset += 1
-        if offset >= len(s):
-            return None, parts, offset
-        if whitespace:
-            parts.append(b'')
-        if s[offset : offset + 1] == b'"' and not parts[-1]:
-            return _parse_quote, parts, offset + 1
-        elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
-            parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
-            return _parse_plain, parts, offset + 1
-        parts[-1] += s[offset : offset + 1]
-        return _parse_plain, parts, offset + 1
-
-    def _parse_quote(parts, s, offset):
-        if offset < len(s) and s[offset : offset + 1] == b'"':  # ""
-            parts.append(b'')
-            offset += 1
-            while offset < len(s) and (
-                s[offset : offset + 1].isspace()
-                or s[offset : offset + 1] == b','
-            ):
-                offset += 1
-            return _parse_plain, parts, offset
-
-        while offset < len(s) and s[offset : offset + 1] != b'"':
-            if (
-                s[offset : offset + 1] == b'\\'
-                and offset + 1 < len(s)
-                and s[offset + 1 : offset + 2] == b'"'
-            ):
-                offset += 1
-                parts[-1] += b'"'
-            else:
-                parts[-1] += s[offset : offset + 1]
-            offset += 1
-
-        if offset >= len(s):
-            real_parts = _configlist(parts[-1])
-            if not real_parts:
-                parts[-1] = b'"'
-            else:
-                real_parts[0] = b'"' + real_parts[0]
-                parts = parts[:-1]
-                parts.extend(real_parts)
-            return None, parts, offset
-
-        offset += 1
-        while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
-            offset += 1
-
-        if offset < len(s):
-            if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
-                parts[-1] += b'"'
-                offset += 1
-            else:
-                parts.append(b'')
-        else:
-            return None, parts, offset
-
-        return _parse_plain, parts, offset
-
-    def _configlist(s):
-        s = s.rstrip(b' ,')
-        if not s:
-            return []
-        parser, parts, offset = _parse_plain, [b''], 0
-        while parser:
-            parser, parts, offset = parser(parts, s, offset)
-        return parts
-
-    if value is not None and isinstance(value, bytes):
-        result = _configlist(value.lstrip(b' ,\n'))
-    else:
-        result = value
-    return result or []