Patchwork extdiff: proof of concept of refactoring to use cmdlines & avoid shlex.split

login
register
mail settings
Submitter Mads Kiilerich
Date Dec. 7, 2014, 3:25 p.m.
Message ID <7017dac8c3cd3abf1b48.1417965901@localhost.localdomain>
Download mbox | patch
Permalink /patch/7017/
State Superseded
Headers show

Comments

Mads Kiilerich - Dec. 7, 2014, 3:25 p.m.
# HG changeset patch
# User Mads Kiilerich <madski@unity3d.com>
# Date 1417965870 -3600
#      Sun Dec 07 16:24:30 2014 +0100
# Node ID 7017dac8c3cd3abf1b48f290a61e434caae754c3
# Parent  c237499a7fba65c88a2da721a22b66df4f39cf4e
extdiff: proof of concept of refactoring to use cmdlines & avoid shlex.split
Katsunori FUJIWARA - Dec. 8, 2014, 3:29 p.m.
At Sun, 07 Dec 2014 16:25:01 +0100,
Mads Kiilerich wrote:
> 
> # HG changeset patch
> # User Mads Kiilerich <madski@unity3d.com>
> # Date 1417965870 -3600
> #      Sun Dec 07 16:24:30 2014 +0100
> # Node ID 7017dac8c3cd3abf1b48f290a61e434caae754c3
> # Parent  c237499a7fba65c88a2da721a22b66df4f39cf4e
> extdiff: proof of concept of refactoring to use cmdlines & avoid shlex.split

This looks very good to me !

I added some comments below.

> diff --git a/hgext/extdiff.py b/hgext/extdiff.py
> --- a/hgext/extdiff.py
> +++ b/hgext/extdiff.py
> @@ -109,7 +109,7 @@ def snapshot(ui, repo, files, node, tmpr
>                                    os.lstat(dest).st_mtime))
>      return dirname, fns_and_mtime
>  
> -def dodiff(ui, repo, diffcmd, diffopts, pats, opts):
> +def dodiff(ui, repo, cmdline, pats, opts):
>      '''Do the actual diff:
>  
>      - copy to a temp structure if diffing 2 internal revisions
> @@ -120,8 +120,7 @@ def dodiff(ui, repo, diffcmd, diffopts, 
>  
>      revs = opts.get('rev')
>      change = opts.get('change')
> -    args = ' '.join(map(util.shellquote, diffopts))
> -    do3way = '$parent2' in args
> +    do3way = '$parent2' in cmdline
>  
>      if revs and change:
>          msg = _('cannot specify --rev and --change at the same time')
> @@ -205,25 +204,24 @@ def dodiff(ui, repo, diffcmd, diffopts, 
>              dir2 = os.path.join(dir2root, dir2, common_file)
>              label2 = common_file + rev2
>  
> -        # Function to quote file/dir names in the argument string.
> +        # Function to insert quoted file/dir names in the argument string.
>          # When not operating in 3-way mode, an empty string is
>          # returned for parent2
>          replace = {'parent': dir1a, 'parent1': dir1a, 'parent2': dir1b,
>                     'plabel1': label1a, 'plabel2': label1b,
>                     'clabel': label2, 'child': dir2,
>                     'root': repo.root}
> -        def quote(match):
> +        def replacer(match):
>              key = match.group()[1:]
>              if not do3way and key == 'parent2':
>                  return ''
>              return util.shellquote(replace[key])
>  
> -        # Match parent2 first, so 'parent1?' will match both parent1 and parent
> -        regex = '\$(parent2|parent1?|child|plabel1|plabel2|clabel|root)'
> -        if not do3way and not re.search(regex, args):
> -            args += ' $parent1 $child'
> -        args = re.sub(regex, quote, args)
> -        cmdline = util.shellquote(diffcmd) + ' ' + args
> +        # Regexps are greedy so 'parent1?' will not match 'parent2'
> +        regex = '\$(parent1?|parent2|child|plabel1|plabel2|clabel|root)'
> +        if not do3way and not re.search(regex, cmdline):
> +            cmdline += ' $parent1 $child'
> +        cmdline = re.sub(regex, replacer, cmdline)
>  
>          ui.debug('running %r in %s\n' % (cmdline, tmproot))
>          ui.system(cmdline, cwd=tmproot)

These changes seem to become more readable by dividing them into
patches (1) just integrating "diffcmd" and "diffopts" into "args", (2)
renaming from "args" to "cmdline", and (3) refactoring comments,
regular expression, function name and so on (please ignore this
comment, if these are mixed just for readability of entire PoC).


> @@ -267,43 +265,57 @@ def extdiff(ui, repo, *pats, **opts):
>      revisions are specified, the working directory files are compared
>      to its parent.'''
>      program = opts.get('program')
> -    option = opts.get('option')
> +    options = opts.get('option')
>      if not program:
>          program = 'diff'
> -        option = option or ['-Npru']
> -    return dodiff(ui, repo, program, option, pats, opts)
> +        if not options:
> +            options = ['-Npru']
> +    cmdline = ' '.join(map(util.shellquote, [program] + options))
> +    return dodiff(ui, repo, cmdline, pats, opts)
>  
>  def uisetup(ui):
>      for cmd, path in ui.configitems('extdiff'):
>          if cmd.startswith('cmd.'):
>              cmd = cmd[4:]
> -            if not path:
> -                path = util.findexe(cmd)
> -                if path is None:
> -                    path = filemerge.findexternaltool(ui, cmd) or cmd
> -            diffopts = shlex.split(ui.config('extdiff', 'opts.' + cmd, ''))
> +            exe = path
> +            if not exe:
> +                exe = util.findexe(cmd)
> +                if exe is None:
> +                    exe = filemerge.findexternaltool(ui, cmd)
> +                if exe is None:
> +                    exe = cmd
> +            cmdline = util.shellquote(exe)
> +            diffopts = ui.config('extdiff', 'opts.' + cmd, '')
> +            if diffopts:
> +                cmdline += ' ' + diffopts
>          elif cmd.startswith('opts.'):
>              continue
>          else:
> -            # command = path opts
> +            # case "cmd = exename diffoptions"
>              if path:
> -                diffopts = shlex.split(path)
> -                path = diffopts.pop(0)
> +                cmdline = path
> +                diffopts = len(shlex.split(cmdline)) > 1
>              else:
> -                path, diffopts = util.findexe(cmd), []
> -                if path is None:
> -                    path = filemerge.findexternaltool(ui, cmd) or cmd
> +                exe = util.findexe(cmd)
> +                if exe is None:
> +                    exe = filemerge.findexternaltool(ui, cmd)
> +                if exe is None:
> +                    exe = cmd
> +                cmdline = util.shellquote(exe)
> +                diffopts = False
>          # look for diff arguments in [diff-tools] then [merge-tools]
> -        if diffopts == []:
> -            args = ui.config('diff-tools', cmd+'.diffargs') or \
> -                   ui.config('merge-tools', cmd+'.diffargs')
> +        if not diffopts:
> +            args = ui.config('diff-tools', cmd + '.diffargs') or \
> +                   ui.config('merge-tools', cmd + '.diffargs')
>              if args:
> -                diffopts = shlex.split(args)
> -        def save(cmd, path, diffopts):
> +                cmdline += ' ' + args
> +        def save(cmdline):
>              '''use closure to save diff command to use'''
>              def mydiff(ui, repo, *pats, **opts):
> -                return dodiff(ui, repo, path, diffopts + opts['option'],
> -                              pats, opts)
> +                options = ' '.join(map(util.shellquote, opts['option']))
> +                if options:
> +                    options = ' ' + options
> +                return dodiff(ui, repo, cmdline + options, pats, opts)
>              doc = _('''\
>  use %(path)s to diff repository (or selected files)
>  
> @@ -325,6 +337,6 @@ use %(path)s to diff repository (or sele
>              # right encoding) prevents that.
>              mydiff.__doc__ = doc.decode(encoding.encoding)
>              return mydiff
> -        cmdtable[cmd] = (save(cmd, path, diffopts),
> +        cmdtable[cmd] = (save(cmdline),
>                           cmdtable['extdiff'][1][1:],
>                           _('hg %s [OPTION]... [FILE]...') % cmd)
> diff --git a/mercurial/posix.py b/mercurial/posix.py
> --- a/mercurial/posix.py
> +++ b/mercurial/posix.py
> @@ -8,7 +8,7 @@
>  from i18n import _
>  import encoding
>  import os, sys, errno, stat, getpass, pwd, grp, socket, tempfile, unicodedata
> -import fcntl
> +import fcntl, re
>  
>  posixfile = open
>  normpath = os.path.normpath
> @@ -312,11 +312,17 @@ if sys.platform == 'cygwin':
>      def checklink(path):
>          return False
>  
> +_needsshellquote = None
>  def shellquote(s):
> +    global _needsshellquote
> +    if _needsshellquote is None:
> +        _needsshellquote = re.compile(r'[^a-zA-Z0-9._/-]').search
>      if os.sys.platform == 'OpenVMS':
>          return '"%s"' % s
> +    elif _needsshellquote(s):
> +        return "'%s'" % s.replace("'", "'\\''")
>      else:
> -        return "'%s'" % s.replace("'", "'\\''")
> +        return s
>  
>  def quotecommand(cmd):
>      return cmd

This kind of changes should be applied also on "windows.shellquote",
shouldn't it ?


BTW, in fact, I thought that "posix.shellquote" doesn't have to check
needs of quoting, because quoting always works correctly on POSIX, at
first.

But for portability of test-extdiff.t, "posix.shellquote" seems to
have to do so, too: checking should be much cheaper than fork/exec-ing
other process.


> diff --git a/tests/test-extdiff.t b/tests/test-extdiff.t
> --- a/tests/test-extdiff.t
> +++ b/tests/test-extdiff.t
> @@ -207,7 +207,7 @@ Fallback to merge-tools.tool.executable|
>    making snapshot of 2 files from working directory
>      a
>      b
> -  running "'$TESTTMP/a/dir/tool.sh'  'a.*' 'a'" in */extdiff.* (glob)
> +  running '$TESTTMP/a/dir/tool.sh a.* a' in */extdiff.* (glob)
>    ** custom diff **
>    cleaning up temp directory
>    [1]
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel

----------------------------------------------------------------------
[FUJIWARA Katsunori]                             foozy@lares.dti.ne.jp
Mads Kiilerich - Dec. 8, 2014, 4:11 p.m.
On 12/08/2014 04:29 PM, FUJIWARA Katsunori wrote:
> These changes seem to become more readable by dividing them into
> patches (1) just integrating "diffcmd" and "diffopts" into "args", (2)
> renaming from "args" to "cmdline", and (3) refactoring comments,
> regular expression, function name and so on (please ignore this
> comment, if these are mixed just for readability of entire PoC).

Yes. It was just a proof of concept, playing around with the code to see 
if the idea could work and what cleanups it could make possible.

We still need a real and clean implementation. It would be great if you 
would pick it up.

>> --- a/mercurial/posix.py
>> +++ b/mercurial/posix.py
>> @@ -8,7 +8,7 @@
>>   from i18n import _
>>   import encoding
>>   import os, sys, errno, stat, getpass, pwd, grp, socket, tempfile, unicodedata
>> -import fcntl
>> +import fcntl, re
>>   
>>   posixfile = open
>>   normpath = os.path.normpath
>> @@ -312,11 +312,17 @@ if sys.platform == 'cygwin':
>>       def checklink(path):
>>           return False
>>   
>> +_needsshellquote = None
>>   def shellquote(s):
>> +    global _needsshellquote
>> +    if _needsshellquote is None:
>> +        _needsshellquote = re.compile(r'[^a-zA-Z0-9._/-]').search
>>       if os.sys.platform == 'OpenVMS':
>>           return '"%s"' % s
>> +    elif _needsshellquote(s):
>> +        return "'%s'" % s.replace("'", "'\\''")
>>       else:
>> -        return "'%s'" % s.replace("'", "'\\''")
>> +        return s
>>   
>>   def quotecommand(cmd):
>>       return cmd
> This kind of changes should be applied also on "windows.shellquote",
> shouldn't it ?
>
>
> BTW, in fact, I thought that "posix.shellquote" doesn't have to check
> needs of quoting, because quoting always works correctly on POSIX, at
> first.

Yes, that was just to try to get rid of annoying superfluous quoting in 
debug output. That is not directly related to the rest of the PoC with 
the approach I ended up using.

/Mads

Patch

diff --git a/hgext/extdiff.py b/hgext/extdiff.py
--- a/hgext/extdiff.py
+++ b/hgext/extdiff.py
@@ -109,7 +109,7 @@  def snapshot(ui, repo, files, node, tmpr
                                   os.lstat(dest).st_mtime))
     return dirname, fns_and_mtime
 
-def dodiff(ui, repo, diffcmd, diffopts, pats, opts):
+def dodiff(ui, repo, cmdline, pats, opts):
     '''Do the actual diff:
 
     - copy to a temp structure if diffing 2 internal revisions
@@ -120,8 +120,7 @@  def dodiff(ui, repo, diffcmd, diffopts, 
 
     revs = opts.get('rev')
     change = opts.get('change')
-    args = ' '.join(map(util.shellquote, diffopts))
-    do3way = '$parent2' in args
+    do3way = '$parent2' in cmdline
 
     if revs and change:
         msg = _('cannot specify --rev and --change at the same time')
@@ -205,25 +204,24 @@  def dodiff(ui, repo, diffcmd, diffopts, 
             dir2 = os.path.join(dir2root, dir2, common_file)
             label2 = common_file + rev2
 
-        # Function to quote file/dir names in the argument string.
+        # Function to insert quoted file/dir names in the argument string.
         # When not operating in 3-way mode, an empty string is
         # returned for parent2
         replace = {'parent': dir1a, 'parent1': dir1a, 'parent2': dir1b,
                    'plabel1': label1a, 'plabel2': label1b,
                    'clabel': label2, 'child': dir2,
                    'root': repo.root}
-        def quote(match):
+        def replacer(match):
             key = match.group()[1:]
             if not do3way and key == 'parent2':
                 return ''
             return util.shellquote(replace[key])
 
-        # Match parent2 first, so 'parent1?' will match both parent1 and parent
-        regex = '\$(parent2|parent1?|child|plabel1|plabel2|clabel|root)'
-        if not do3way and not re.search(regex, args):
-            args += ' $parent1 $child'
-        args = re.sub(regex, quote, args)
-        cmdline = util.shellquote(diffcmd) + ' ' + args
+        # Regexps are greedy so 'parent1?' will not match 'parent2'
+        regex = '\$(parent1?|parent2|child|plabel1|plabel2|clabel|root)'
+        if not do3way and not re.search(regex, cmdline):
+            cmdline += ' $parent1 $child'
+        cmdline = re.sub(regex, replacer, cmdline)
 
         ui.debug('running %r in %s\n' % (cmdline, tmproot))
         ui.system(cmdline, cwd=tmproot)
@@ -267,43 +265,57 @@  def extdiff(ui, repo, *pats, **opts):
     revisions are specified, the working directory files are compared
     to its parent.'''
     program = opts.get('program')
-    option = opts.get('option')
+    options = opts.get('option')
     if not program:
         program = 'diff'
-        option = option or ['-Npru']
-    return dodiff(ui, repo, program, option, pats, opts)
+        if not options:
+            options = ['-Npru']
+    cmdline = ' '.join(map(util.shellquote, [program] + options))
+    return dodiff(ui, repo, cmdline, pats, opts)
 
 def uisetup(ui):
     for cmd, path in ui.configitems('extdiff'):
         if cmd.startswith('cmd.'):
             cmd = cmd[4:]
-            if not path:
-                path = util.findexe(cmd)
-                if path is None:
-                    path = filemerge.findexternaltool(ui, cmd) or cmd
-            diffopts = shlex.split(ui.config('extdiff', 'opts.' + cmd, ''))
+            exe = path
+            if not exe:
+                exe = util.findexe(cmd)
+                if exe is None:
+                    exe = filemerge.findexternaltool(ui, cmd)
+                if exe is None:
+                    exe = cmd
+            cmdline = util.shellquote(exe)
+            diffopts = ui.config('extdiff', 'opts.' + cmd, '')
+            if diffopts:
+                cmdline += ' ' + diffopts
         elif cmd.startswith('opts.'):
             continue
         else:
-            # command = path opts
+            # case "cmd = exename diffoptions"
             if path:
-                diffopts = shlex.split(path)
-                path = diffopts.pop(0)
+                cmdline = path
+                diffopts = len(shlex.split(cmdline)) > 1
             else:
-                path, diffopts = util.findexe(cmd), []
-                if path is None:
-                    path = filemerge.findexternaltool(ui, cmd) or cmd
+                exe = util.findexe(cmd)
+                if exe is None:
+                    exe = filemerge.findexternaltool(ui, cmd)
+                if exe is None:
+                    exe = cmd
+                cmdline = util.shellquote(exe)
+                diffopts = False
         # look for diff arguments in [diff-tools] then [merge-tools]
-        if diffopts == []:
-            args = ui.config('diff-tools', cmd+'.diffargs') or \
-                   ui.config('merge-tools', cmd+'.diffargs')
+        if not diffopts:
+            args = ui.config('diff-tools', cmd + '.diffargs') or \
+                   ui.config('merge-tools', cmd + '.diffargs')
             if args:
-                diffopts = shlex.split(args)
-        def save(cmd, path, diffopts):
+                cmdline += ' ' + args
+        def save(cmdline):
             '''use closure to save diff command to use'''
             def mydiff(ui, repo, *pats, **opts):
-                return dodiff(ui, repo, path, diffopts + opts['option'],
-                              pats, opts)
+                options = ' '.join(map(util.shellquote, opts['option']))
+                if options:
+                    options = ' ' + options
+                return dodiff(ui, repo, cmdline + options, pats, opts)
             doc = _('''\
 use %(path)s to diff repository (or selected files)
 
@@ -325,6 +337,6 @@  use %(path)s to diff repository (or sele
             # right encoding) prevents that.
             mydiff.__doc__ = doc.decode(encoding.encoding)
             return mydiff
-        cmdtable[cmd] = (save(cmd, path, diffopts),
+        cmdtable[cmd] = (save(cmdline),
                          cmdtable['extdiff'][1][1:],
                          _('hg %s [OPTION]... [FILE]...') % cmd)
diff --git a/mercurial/posix.py b/mercurial/posix.py
--- a/mercurial/posix.py
+++ b/mercurial/posix.py
@@ -8,7 +8,7 @@ 
 from i18n import _
 import encoding
 import os, sys, errno, stat, getpass, pwd, grp, socket, tempfile, unicodedata
-import fcntl
+import fcntl, re
 
 posixfile = open
 normpath = os.path.normpath
@@ -312,11 +312,17 @@  if sys.platform == 'cygwin':
     def checklink(path):
         return False
 
+_needsshellquote = None
 def shellquote(s):
+    global _needsshellquote
+    if _needsshellquote is None:
+        _needsshellquote = re.compile(r'[^a-zA-Z0-9._/-]').search
     if os.sys.platform == 'OpenVMS':
         return '"%s"' % s
+    elif _needsshellquote(s):
+        return "'%s'" % s.replace("'", "'\\''")
     else:
-        return "'%s'" % s.replace("'", "'\\''")
+        return s
 
 def quotecommand(cmd):
     return cmd
diff --git a/tests/test-extdiff.t b/tests/test-extdiff.t
--- a/tests/test-extdiff.t
+++ b/tests/test-extdiff.t
@@ -207,7 +207,7 @@  Fallback to merge-tools.tool.executable|
   making snapshot of 2 files from working directory
     a
     b
-  running "'$TESTTMP/a/dir/tool.sh'  'a.*' 'a'" in */extdiff.* (glob)
+  running '$TESTTMP/a/dir/tool.sh a.* a' in */extdiff.* (glob)
   ** custom diff **
   cleaning up temp directory
   [1]