Patchwork D8281: narrow: escape includepats/excludepats when sending over the wire

login
register
mail settings
Submitter phabricator
Date March 17, 2020, 9:59 p.m.
Message ID <205a92cb79715198d2cfd1078a251f53@localhost.localdomain>
Download mbox | patch
Permalink /patch/45815/
State Not Applicable
Headers show

Comments

phabricator - March 17, 2020, 9:59 p.m.
spectral updated this revision to Diff 20819.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D8281?vs=20818&id=20819

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8281/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8281

AFFECTED FILES
  hgext/narrow/narrowbundle2.py
  hgext/narrow/narrowwirepeer.py
  mercurial/wireprototypes.py
  mercurial/wireprotov1peer.py
  mercurial/wireprotov1server.py
  relnotes/next
  tests/test-doctest.py
  tests/test-narrow-exchange.t

CHANGE DETAILS




To: spectral, durin42, martinvonz, #hg-reviewers, marmoute
Cc: marmoute, mharbison72, mercurial-devel

Patch

diff --git a/tests/test-narrow-exchange.t b/tests/test-narrow-exchange.t
--- a/tests/test-narrow-exchange.t
+++ b/tests/test-narrow-exchange.t
@@ -223,3 +223,62 @@ 
   remote: rollback completed (lfs-on !)
   remote: abort: data/inside2/f.i@f59b4e021835: no match found! (lfs-on !)
   abort: stream ended unexpectedly (got 0 bytes, expected 4) (lfs-on !)
+
+Test paths with commas in them
+  $ cd $TESTTMP
+  $ hg init commas-master
+  $ cd commas-master
+  $ mkdir a,b
+  $ mkdir a,b/c,d
+  $ mkdir a,b/e,f
+  $ mkdir g
+  $ echo abcd > a,b/c,d/abcd
+  $ echo abef > a,b/e,f/abef
+  $ echo ghi > g/h,i
+  $ hg ci -qAm r0
+  $ echo abcd2 >> a,b/c,d/abcd
+  $ echo abef2 >> a,b/e,f/abef
+  $ echo ghi2 >> g/h,i
+  $ hg ci -qm r1
+  $ cd ..
+
+Test that we can pull and push with a file that has a comma in the name, even
+though the commas don't appear in the narrowspec file (since they're just
+filenames)
+  $ hg clone --narrow ssh://user@dummy/commas-master commas-in-file \
+  >   --include g -qr 0
+  $ cd commas-in-file
+  $ hg pull -q
+  $ echo ghi3 >> g/h,i
+  $ hg ci -qm 'modify g/h,i'
+  $ hg push -qf
+  $ cd ..
+
+Test commas in the --include, plus pull+push
+  $ hg clone --narrow ssh://user@dummy/commas-master commas-in-dir \
+  >   --include a,b --exclude a,b/c,d -qr 0
+  $ cd commas-in-dir
+  $ hg pull -q
+  $ echo abef3 >> a,b/e,f/abef
+  $ hg ci -qm 'modify a,b/e,f'
+  $ hg push -qf
+
+Test that --{add,remove}{include,exclude} work with commas in the directory
+names.
+  $ hg tracked
+  I path:a,b
+  X path:a,b/c,d
+  $ hg tracked --removeexclude a,b/c,d --addinclude a,b/e,f -q
+  $ hg tracked
+  I path:a,b
+  I path:a,b/e,f
+  $ hg files
+  a,b/c,d/abcd
+  a,b/e,f/abef
+  $ hg tracked --removeinclude a,b/e,f --addexclude a,b/c,d -q
+  $ hg tracked
+  I path:a,b
+  X path:a,b/c,d
+  $ hg files
+  a,b/e,f/abef
+  $ cd ..
diff --git a/tests/test-doctest.py b/tests/test-doctest.py
--- a/tests/test-doctest.py
+++ b/tests/test-doctest.py
@@ -137,6 +137,7 @@ 
         ('mercurial.revlogutils.deltas', '{}'),
         ('mercurial.revset', '{}'),
         ('mercurial.revsetlang', '{}'),
+        ('mercurial.wireprototypes', '{}'),
         ('mercurial.simplemerge', '{}'),
         ('mercurial.smartset', '{}'),
         ('mercurial.store', '{}'),
diff --git a/relnotes/next b/relnotes/next
--- a/relnotes/next
+++ b/relnotes/next
@@ -29,6 +29,10 @@ 
  * `hg debugmergestate` output is now templated, which may be useful
    e.g. for IDEs that want to help the user resolve merge conflicts.
 
+ * The experimental `narrow` extension will now be able to have include or
+   exclude patterns that have a comma in the name when both client and server
+   are updated.
+
 
 == New Experimental Features ==
 
diff --git a/mercurial/wireprotov1server.py b/mercurial/wireprotov1server.py
--- a/mercurial/wireprotov1server.py
+++ b/mercurial/wireprotov1server.py
@@ -448,6 +448,8 @@ 
             opts[k] = list(v.split(b','))
         elif keytype == b'scsv':
             opts[k] = set(v.split(b','))
+        elif keytype == b'qcsv':
+            opts[k] = wireprototypes.decode_qcsv(v)
         elif keytype == b'boolean':
             # Client should serialize False as '0', which is a non-empty string
             # so it evaluates as a True bool.
diff --git a/mercurial/wireprotov1peer.py b/mercurial/wireprotov1peer.py
--- a/mercurial/wireprotov1peer.py
+++ b/mercurial/wireprotov1peer.py
@@ -462,6 +462,8 @@ 
                 value = b','.join(value)
             elif keytype == b'scsv':
                 value = b','.join(sorted(value))
+            elif keytype == b'qcsv':
+                value = wireprototypes.encode_qcsv(value)
             elif keytype == b'boolean':
                 value = b'%i' % bool(value)
             elif keytype != b'plain':
diff --git a/mercurial/wireprototypes.py b/mercurial/wireprototypes.py
--- a/mercurial/wireprototypes.py
+++ b/mercurial/wireprototypes.py
@@ -161,6 +161,7 @@ 
 # :nodes: list of binary nodes, transmitted as space-separated hex nodes
 # :csv:   list of values, transmitted as comma-separated values
 # :scsv:  set of values, transmitted as comma-separated values
+# :qcsv:  list of values, transmitted as quote-escaped comma-separated values
 # :plain: string with no transformation needed.
 GETBUNDLE_ARGUMENTS = {
     b'heads': b'nodes',
@@ -173,11 +174,194 @@ 
     b'cg': b'boolean',
     b'cbattempted': b'boolean',
     b'stream': b'boolean',
-    b'includepats': b'csv',
-    b'excludepats': b'csv',
+    b'includepats': b'qcsv',
+    b'excludepats': b'qcsv',
 }
 
 
+def encode_qcsv(paths):
+    r'''escape and join a value of type 'qcsv', producing a bytes object
+
+    This produces an RFC 4180-like encoding, with the primary difference being
+    that we allow and produce items that have an embedded " character without
+    forcing the item to be completely surrounded by " characters. That is,
+    b'a"b' serializes as b'a"b', not b'"a""b"'. We also do not treat \r or \n
+    special in any way.
+
+    If an item starts with a " character or has a , character in it, the entire
+    item is escaped.
+
+    >>> from mercurial.pycompat import sysstr
+    >>> def check(paths):
+    ...     return sysstr(encode_qcsv(paths))
+    >>> check([b'a', b'b', b'c'])
+    'a,b,c'
+    >>> check([b'a"b', b'c'])
+    'a"b,c'
+    >>> check([b'a,b', b'c'])
+    '"a,b",c'
+    >>> check([b'a,b,c'])
+    '"a,b,c"'
+    >>> check([b'a"b,"'])
+    '"a""b,"""'
+    >>> check([b'a"cb', b'"'])
+    'a"cb,""""'
+    >>> check([b'"'])
+    '""""'
+    >>> check([b'', b''])
+    ','
+    >>> check([b'', b'', b'', b''])
+    ',,,'
+    >>> check([b','])
+    '","'
+    >>> check([b'",,",""'])
+    '""",,"","""""'
+    >>> check([])
+    ''
+    >>> check([b''])
+    ''
+    '''
+
+    def maybequote(p):
+        if p.startswith(b'"') or b',' in p:
+            return b'"%s"' % p.replace(b'"', b'""')
+        return p
+
+    return b','.join([maybequote(p) for p in paths])
+
+
+def _commasplit(s):
+    '''Split a csv value, doing RFC 4180-like unescaping.
+
+    If an item in the csv list is escaped, the whole item will be enclosed in "
+    characters. Embedded " characters in these strings are represented by two
+    sequential " characters.
+
+    If the item does not start with a " character, it is returned as-is
+    (including any embedded " characters). If the item starts with a " character
+    but does not end in one, we will remove the " and the final character of the
+    string, behavior in this situation is not guaranteed to be stable.
+
+    >>> from mercurial.pycompat import sysstr
+    >>> def check(s):
+    ...     return list([sysstr(x) for x in _commasplit(s)])
+    >>> check(b'a,b,c')
+    ['a', 'b', 'c']
+    >>> check(b'a"b,c')
+    ['a"b', 'c']
+    >>> check(b'"a,b",c')
+    ['a,b', 'c']
+    >>> check(b'"a,b,c"')
+    ['a,b,c']
+    >>> check(b'"a""b,"""')
+    ['a"b,"']
+    >>> check(b'""')
+    ['']
+    >>> check(b'""""')
+    ['"']
+    >>> check(b'')
+    ['']
+    >>> check(b',')
+    ['', '']
+    >>> check(b',,,')
+    ['', '', '', '']
+    >>> check(b'","')
+    [',']
+    >>> check(b'""",,"","""""')
+    ['",,",""']
+    >>> # These are invalid, so just make sure it doesn't crash/hang, the actual
+    >>> # value is essentially irrelevant.
+    >>> check(b'"')
+    ['']
+    >>> check(b',",')
+    ['', '']
+    >>> # Note the missing 'c'.
+    >>> check(b'"abc')
+    ['ab']
+    '''
+
+    startpos = 0
+    while True:  # loop over all comma-separated items
+        quoted = False
+        if s[startpos : startpos + 1] == b'"':
+            quoted = True
+            startpos += 1
+        searchpos = startpos
+        while True:  # loop over all embedded commas
+            commapos = s.find(b',', searchpos)
+            if commapos < 0:
+                if quoted:
+                    yield s[startpos:-1].replace(b'""', b'"')
+                else:
+                    yield s[startpos:]
+                return
+            else:
+                if quoted:
+                    if s[startpos:commapos].count(b'"') % 2 == 0:
+                        searchpos = commapos + 1
+                        continue  # find another comma, this one is quoted
+                    yield s[startpos : commapos - 1].replace(b'""', b'"')
+
+                elif commapos == startpos:
+                    yield ''
+                else:
+                    yield s[startpos:commapos]
+                startpos = commapos + 1
+                break  # Stop searching for embedded commas, go to next item
+
+
+def decode_qcsv(s):
+    r'''decode an value of type 'qcsv', producing a list
+
+    If `s` is an empty string, decodes to an empty list.
+
+    >>> from mercurial.pycompat import sysstr
+    >>> def check(s):
+    ...     return list([sysstr(x) for x in decode_qcsv(s)])
+    >>> check(b'a,b,c')
+    ['a', 'b', 'c']
+    >>> check(b'a"b,c')
+    ['a"b', 'c']
+    >>> check(b'"a,b",c')
+    ['a,b', 'c']
+    >>> check(b'"a,b,c"')
+    ['a,b,c']
+    >>> check(b'"a""b,"""')
+    ['a"b,"']
+    >>> check(b'""')
+    ['']
+    >>> check(b'""""')
+    ['"']
+    >>> check(b'')
+    []
+    >>> check(b',')
+    ['', '']
+    >>> check(b',,,')
+    ['', '', '', '']
+    >>> check(b'","')
+    [',']
+    >>> check(b'""",,"","""""')
+    ['",,",""']
+    >>> # These are invalid, so just make sure it doesn't crash/hang, the actual
+    >>> # value is essentially irrelevant.
+    >>> check(b'"')
+    ['']
+    >>> check(b',",')
+    ['', '']
+    >>> # Note the missing 'c'.
+    >>> check(b'"abc')
+    ['ab']
+    '''
+    if not s:
+        return []
+
+    if s[0:1] != b'"' and b',"' not in s:
+        # fast path
+        return s.split(b',')
+
+    return list(_commasplit(s))
+
+
 class baseprotocolhandler(interfaceutil.Interface):
     """Abstract base class for wire protocol handlers.
 
diff --git a/hgext/narrow/narrowwirepeer.py b/hgext/narrow/narrowwirepeer.py
--- a/hgext/narrow/narrowwirepeer.py
+++ b/hgext/narrow/narrowwirepeer.py
@@ -79,14 +79,10 @@ 
     preferuncompressed = False
     try:
 
-        def splitpaths(data):
-            # work around ''.split(',') => ['']
-            return data.split(b',') if data else []
-
-        oldincludes = splitpaths(oldincludes)
-        newincludes = splitpaths(newincludes)
-        oldexcludes = splitpaths(oldexcludes)
-        newexcludes = splitpaths(newexcludes)
+        oldincludes = wireprototypes.decode_qcsv(oldincludes)
+        newincludes = wireprototypes.decode_qcsv(newincludes)
+        oldexcludes = wireprototypes.decode_qcsv(oldexcludes)
+        newexcludes = wireprototypes.decode_qcsv(newexcludes)
         # validate the patterns
         narrowspec.validatepatterns(set(oldincludes))
         narrowspec.validatepatterns(set(newincludes))
@@ -143,7 +139,7 @@ 
         kwargs[ch] = wireprototypes.encodelist(kwargs[ch])
 
     for ch in ('oldincludes', 'newincludes', 'oldexcludes', 'newexcludes'):
-        kwargs[ch] = b','.join(kwargs[ch])
+        kwargs[ch] = wireprototypes.encode_qcsv(kwargs[ch])
 
     kwargs['ellipses'] = b'%i' % bool(kwargs['ellipses'])
     f = remote._callcompressable(b'narrow_widen', **kwargs)
diff --git a/hgext/narrow/narrowbundle2.py b/hgext/narrow/narrowbundle2.py
--- a/hgext/narrow/narrowbundle2.py
+++ b/hgext/narrow/narrowbundle2.py
@@ -304,8 +304,8 @@ 
 
     getbundleargs[b'narrow'] = b'boolean'
     getbundleargs[b'depth'] = b'plain'
-    getbundleargs[b'oldincludepats'] = b'csv'
-    getbundleargs[b'oldexcludepats'] = b'csv'
+    getbundleargs[b'oldincludepats'] = b'qcsv'
+    getbundleargs[b'oldexcludepats'] = b'qcsv'
     getbundleargs[b'known'] = b'csv'
 
     # Extend changegroup serving to handle requests from narrow clients.