Patchwork D8281: narrow: escape includepats/excludepats when sending over the wire (BC)

login
register
mail settings
Submitter phabricator
Date March 13, 2020, 5:21 a.m.
Message ID <8484f530f6c84d9c29b2bda52af5c15b@localhost.localdomain>
Download mbox | patch
Permalink /patch/45763/
State Not Applicable
Headers show

Comments

phabricator - March 13, 2020, 5:21 a.m.
spectral updated this revision to Diff 20767.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D8281?vs=20765&id=20767

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8281/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8281

AFFECTED FILES
  hgext/narrow/narrowbundle2.py
  hgext/narrow/narrowwirepeer.py
  mercurial/wireprototypes.py
  mercurial/wireprotov1peer.py
  mercurial/wireprotov1server.py
  relnotes/next
  tests/test-doctest.py
  tests/test-narrow-exchange.t

CHANGE DETAILS




To: spectral, durin42, martinvonz, #hg-reviewers
Cc: mharbison72, mercurial-devel

Patch

diff --git a/tests/test-narrow-exchange.t b/tests/test-narrow-exchange.t
--- a/tests/test-narrow-exchange.t
+++ b/tests/test-narrow-exchange.t
@@ -223,3 +223,62 @@ 
   remote: rollback completed (lfs-on !)
   remote: abort: data/inside2/f.i@f59b4e021835: no match found! (lfs-on !)
   abort: stream ended unexpectedly (got 0 bytes, expected 4) (lfs-on !)
+
+Test paths with commas in them
+  $ cd $TESTTMP
+  $ hg init commas-master
+  $ cd commas-master
+  $ mkdir a,b
+  $ mkdir a,b/c,d
+  $ mkdir a,b/e,f
+  $ mkdir g
+  $ echo abcd > a,b/c,d/abcd
+  $ echo abef > a,b/e,f/abef
+  $ echo ghi > g/h,i
+  $ hg ci -qAm r0
+  $ echo abcd2 >> a,b/c,d/abcd
+  $ echo abef2 >> a,b/e,f/abef
+  $ echo ghi2 >> g/h,i
+  $ hg ci -qm r1
+  $ cd ..
+
+Test that we can pull and push with a file that has a comma in the name, even
+though the commas don't appear in the narrowspec file (since they're just
+filenames)
+  $ hg clone --narrow ssh://user@dummy/commas-master commas-in-file \
+  >   --include g -qr 0
+  $ cd commas-in-file
+  $ hg pull -q
+  $ echo ghi3 >> g/h,i
+  $ hg ci -qm 'modify g/h,i'
+  $ hg push -qf
+  $ cd ..
+
+Test commas in the --include, plus pull+push
+  $ hg clone --narrow ssh://user@dummy/commas-master commas-in-dir \
+  >   --include a,b --exclude a,b/c,d -qr 0
+  $ cd commas-in-dir
+  $ hg pull -q
+  $ echo abef3 >> a,b/e,f/abef
+  $ hg ci -qm 'modify a,b/e,f'
+  $ hg push -qf
+
+Test that --{add,remove}{include,exclude} work with commas in the directory
+names.
+  $ hg tracked
+  I path:a,b
+  X path:a,b/c,d
+  $ hg tracked --removeexclude a,b/c,d --addinclude a,b/e,f -q
+  $ hg tracked
+  I path:a,b
+  I path:a,b/e,f
+  $ hg files
+  a,b/c,d/abcd
+  a,b/e,f/abef
+  $ hg tracked --removeinclude a,b/e,f --addexclude a,b/c,d -q
+  $ hg tracked
+  I path:a,b
+  X path:a,b/c,d
+  $ hg files
+  a,b/e,f/abef
+  $ cd ..
diff --git a/tests/test-doctest.py b/tests/test-doctest.py
--- a/tests/test-doctest.py
+++ b/tests/test-doctest.py
@@ -79,6 +79,7 @@ 
 testmod('mercurial.util', testtarget='platform')  # windows.py or posix.py
 testmod('mercurial.utils.dateutil')
 testmod('mercurial.utils.stringutil')
+testmod('mercurial.wireprototypes')
 testmod('hgext.convert.convcmd')
 testmod('hgext.convert.cvsps')
 testmod('hgext.convert.filemap')
diff --git a/relnotes/next b/relnotes/next
--- a/relnotes/next
+++ b/relnotes/next
@@ -23,6 +23,10 @@ 
    Will use `zstd` compression for new repositories is available, and will
    simply fall back to `zlib` if not.
 
+ * The experimental `narrow` extension will now be able to have include or
+   exclude patterns that have a comma in the name when both client and server
+   are updated.
+
 == New Experimental Features ==
 
  * `hg copy` now supports a `--at-rev` argument to mark files as
@@ -49,6 +53,15 @@ 
  * `hg recover` does not verify the validity of the whole repository
    anymore. You can pass `--verify` or call `hg verify` if necessary.
 
+ * The experimental `narrow` extension wireproto serialization format is
+   changing slightly. Windows clients will transmit paths with `/` as the path
+   separator instead of `\`. Any `,` or `\` in the path for include or exclude
+   patterns will be escaped. Newer servers speaking with older clients may
+   accidentally unescape paths that weren't actually escaped by the client, but
+   this is extremely unlikely to happen in practice. Newer clients speaking with
+   older servers will escape any `\` character and this will not be interpreted
+   properly by the server, causing the path to not match.
+
 == Internal API Changes ==
 
  * The deprecated `ui.progress()` has now been deleted. Please use
diff --git a/mercurial/wireprotov1server.py b/mercurial/wireprotov1server.py
--- a/mercurial/wireprotov1server.py
+++ b/mercurial/wireprotov1server.py
@@ -448,6 +448,8 @@ 
             opts[k] = list(v.split(b','))
         elif keytype == b'scsv':
             opts[k] = set(v.split(b','))
+        elif keytype == b'csvpaths':
+            opts[k] = wireprototypes.decodecsvpaths(v)
         elif keytype == b'boolean':
             # Client should serialize False as '0', which is a non-empty string
             # so it evaluates as a True bool.
diff --git a/mercurial/wireprotov1peer.py b/mercurial/wireprotov1peer.py
--- a/mercurial/wireprotov1peer.py
+++ b/mercurial/wireprotov1peer.py
@@ -462,6 +462,8 @@ 
                 value = b','.join(value)
             elif keytype == b'scsv':
                 value = b','.join(sorted(value))
+            elif keytype == b'csvpaths':
+                value = wireprototypes.encodecsvpaths(value)
             elif keytype == b'boolean':
                 value = b'%i' % bool(value)
             elif keytype != b'plain':
diff --git a/mercurial/wireprototypes.py b/mercurial/wireprototypes.py
--- a/mercurial/wireprototypes.py
+++ b/mercurial/wireprototypes.py
@@ -161,6 +161,8 @@ 
 # :nodes: list of binary nodes, transmitted as space-separated hex nodes
 # :csv:   list of values, transmitted as comma-separated values
 # :scsv:  set of values, transmitted as comma-separated values
+# :csvpaths: list of values, transmitted as comma-separated values individually
+#            escaped with `encodecsvpaths`
 # :plain: string with no transformation needed.
 GETBUNDLE_ARGUMENTS = {
     b'heads': b'nodes',
@@ -173,11 +175,85 @@ 
     b'cg': b'boolean',
     b'cbattempted': b'boolean',
     b'stream': b'boolean',
-    b'includepats': b'csv',
-    b'excludepats': b'csv',
+    b'includepats': b'csvpaths',
+    b'excludepats': b'csvpaths',
 }
 
 
+def encodecsvpaths(paths):
+    r'''escape and join a value of type 'csvpaths', producing a bytes object
+
+    pconvert (so for windows clients, paths like a\b -> a/b, while non-windows
+    clients allow a\b through without issue). Then replace `\` with `\s,` then
+    replace `,` with `\c`.
+
+    >>> from mercurial.pycompat import sysstr
+    >>> def check(paths):
+    ...     return sysstr(encodecsvpaths(paths))
+    >>> encodecsvpaths([b'a', b'b', b'c'])
+    'a,b,c'
+    >>> encodecsvpaths([br'a\b', b'c'])
+    'a\\SlAsHb,c'
+    >>> encodecsvpaths([b'a,b', b'c'])
+    'a\\CoMmAb,c'
+    >>> encodecsvpaths([b'a\\cb,\\'])
+    'a\\SlAsHcb\\CoMmA\\SlAsH'
+    >>> encodecsvpaths([b','])
+    '\\CoMmA'
+    >>> encodecsvpaths([b'\\'])
+    '\\SlAsH'
+    >>> encodecsvpaths([])
+    ''
+    >>> encodecsvpaths([b''])
+    ''
+    '''
+    return b','.join(
+        [
+            util.pconvert(p)
+            .replace(b'\\', br'\SlAsH')
+            .replace(b',', br'\CoMmA')
+            for p in paths
+        ]
+    )
+
+
+def decodecsvpaths(s):
+    r'''decode an value of type 'csvpaths', producing a list
+
+    If `s` is an empty string, decodes to an empty list.
+
+    >>> from mercurial.pycompat import sysstr
+    >>> def check(s):
+    ...     return [sysstr(x) for x in decodecsvpaths(s)]
+    >>> check(br'a,b,c')
+    ['a', 'b', 'c']
+    >>> check(br'a\SlAsHb,c')
+    ['a\\b', 'c']
+    >>> check(br'a\CoMmAb,c')
+    ['a,b', 'c']
+    >>> check(br'a\SlAsHcb\CoMmA\SlAsH')
+    ['a\\cb,\\']
+    >>> check(br'\CoMmA')
+    [',']
+    >>> check(br'\SlAsH')
+    ['\\']
+    >>> check(br'')
+    []
+
+    >>> # We may receive sequences like this from old clients.
+    >>> check(br'\x')
+    ['\\x']
+    '''
+    return (
+        [
+            p.replace(br'\CoMmA', b',').replace(br'\SlAsH', b'\\')
+            for p in s.split(b',')
+        ]
+        if s
+        else []
+    )
+
+
 class baseprotocolhandler(interfaceutil.Interface):
     """Abstract base class for wire protocol handlers.
 
diff --git a/hgext/narrow/narrowwirepeer.py b/hgext/narrow/narrowwirepeer.py
--- a/hgext/narrow/narrowwirepeer.py
+++ b/hgext/narrow/narrowwirepeer.py
@@ -79,14 +79,10 @@ 
     preferuncompressed = False
     try:
 
-        def splitpaths(data):
-            # work around ''.split(',') => ['']
-            return data.split(b',') if data else []
-
-        oldincludes = splitpaths(oldincludes)
-        newincludes = splitpaths(newincludes)
-        oldexcludes = splitpaths(oldexcludes)
-        newexcludes = splitpaths(newexcludes)
+        oldincludes = wireprototypes.decodecsvpaths(oldincludes)
+        newincludes = wireprototypes.decodecsvpaths(newincludes)
+        oldexcludes = wireprototypes.decodecsvpaths(oldexcludes)
+        newexcludes = wireprototypes.decodecsvpaths(newexcludes)
         # validate the patterns
         narrowspec.validatepatterns(set(oldincludes))
         narrowspec.validatepatterns(set(newincludes))
@@ -143,7 +139,7 @@ 
         kwargs[ch] = wireprototypes.encodelist(kwargs[ch])
 
     for ch in ('oldincludes', 'newincludes', 'oldexcludes', 'newexcludes'):
-        kwargs[ch] = b','.join(kwargs[ch])
+        kwargs[ch] = wireprototypes.encodecsvpaths(kwargs[ch])
 
     kwargs['ellipses'] = b'%i' % bool(kwargs['ellipses'])
     f = remote._callcompressable(b'narrow_widen', **kwargs)
diff --git a/hgext/narrow/narrowbundle2.py b/hgext/narrow/narrowbundle2.py
--- a/hgext/narrow/narrowbundle2.py
+++ b/hgext/narrow/narrowbundle2.py
@@ -304,8 +304,8 @@ 
 
     getbundleargs[b'narrow'] = b'boolean'
     getbundleargs[b'depth'] = b'plain'
-    getbundleargs[b'oldincludepats'] = b'csv'
-    getbundleargs[b'oldexcludepats'] = b'csv'
+    getbundleargs[b'oldincludepats'] = b'csvpaths'
+    getbundleargs[b'oldexcludepats'] = b'csvpaths'
     getbundleargs[b'known'] = b'csv'
 
     # Extend changegroup serving to handle requests from narrow clients.