Patchwork [v5] cmdutil: add within-line color diff capacity

login
register
mail settings
Submitter Yuya Nishihara
Date Nov. 28, 2017, 2:09 p.m.
Message ID <20171128230900.4a25aae19e13127e6487e364@tcha.org>
Download mbox | patch
Permalink /patch/25780/
State Superseded
Headers show

Comments

Yuya Nishihara - Nov. 28, 2017, 2:09 p.m.
On Mon, 27 Nov 2017 21:38:48 +0900, matthieu.laneuville@octobus.net wrote:
> # HG changeset patch
> # User Matthieu Laneuville <matthieu.laneuville@octobus.net>
> # Date 1508944418 -32400
> #      Thu Oct 26 00:13:38 2017 +0900
> # Node ID 9a6865e011743fa354dc1ae27d66600e022cddad
> # Parent  32bb27dd52825236ba1b6c06fe60e140d6b5ea45
> # EXP-Topic inline-diff
> cmdutil: add within-line color diff capacity

Sorry for late review. I played with this patch and I like the feature, but
the output seems not so well, e.g. highlighted parts are too broad and too few.

A couple of comments follow:

> --- a/mercurial/cmdutil.py	Tue Nov 21 00:24:09 2017 -0500
> +++ b/mercurial/cmdutil.py	Thu Oct 26 00:13:38 2017 +0900
> @@ -7,6 +7,7 @@
>  
>  from __future__ import absolute_import
>  
> +import difflib
>  import errno
>  import itertools
>  import os
> @@ -1513,6 +1514,11 @@ def diffordiffstat(ui, repo, diffopts, n
>                  ui.warn(_('warning: %s not inside relative root %s\n') % (
>                      match.uipath(matchroot), uirelroot))
>  
> +    store = {
> +        'diff.inserted': [],
> +        'diff.deleted': []
> +    }
> +    status = False

Nit: 'status = None' is preferred as it isn't a boolean type.

>      if stat:
>          diffopts = diffopts.copy(context=0)
>          width = 80
> @@ -1529,7 +1535,31 @@ def diffordiffstat(ui, repo, diffopts, n
>                                           changes, diffopts, prefix=prefix,
>                                           relroot=relroot,
>                                           hunksfilterfn=hunksfilterfn):
> -            write(chunk, label=label)
> +
> +            if not ui.configbool("experimental", "inline-color-diff"):
> +                write(chunk, label=label)
> +                continue
> +
> +            # Each deleted/inserted chunk is followed by an EOL chunk with ''
> +            # label. The 'status' flag helps us grab that second line.
> +            if label in ['diff.deleted', 'diff.inserted'] or status:
> +                if status:
> +                    store[status].append(chunk)
> +                    status = False
> +                else:
> +                    store[label].append(chunk)
> +                    status = label
> +                continue
> +
> +            if store['diff.inserted'] or store['diff.deleted']:
> +                for line, l in _chunkdiff(store):
> +                    write(line, label=l)
> +
> +                store['diff.inserted'] = []
> +                store['diff.deleted'] = []
> +
> +            if chunk:
> +                write(chunk, label=label)

Can't we add a word-diff feature by hooking/replacing patch.difflabel()
function? I think this "store" hack is needed because we're post-processing
a labeled output instead of doing that at the right layer.

> +def _chunkdiff(store):
> +    '''Returns a (line, label) iterator over a corresponding deletion and
> +       insertion set. The set has to be considered as a whole in order to match
> +       lines and perform inline coloring.
> +    '''
> +    def chunkiterator(list1, list2, direction):
> +        '''For each string in list1, finds matching string in list2 and returns
> +           an iterator over their differences.
> +        '''
> +        used = []
> +        for a in list1:
> +            done = False
> +            for i, b in enumerate(list2):
> +                if done or i in used:
> +                    continue
> +                if difflib.SequenceMatcher(None, a, b).ratio() > 0.7:
> +                    buff = _inlinediff(a, b, direction=direction)
> +                    for line in buff:
> +                        yield (line[1], line[0])
> +                    done = True
> +                    used.append(i) # insure lines in b can be matched only once

It seems wrong to allow crossed matches between list1 and list2. For example,
if list1[0] matched list2[5], list1[1] shouldn't match list2[4].

> +            if not done:
> +                yield (a, 'diff.' + direction)
> +
> +    insert = store['diff.inserted']
> +    delete = store['diff.deleted']
> +    return itertools.chain(chunkiterator(delete, insert, 'deleted'),
> +                           chunkiterator(insert, delete, 'inserted'))
> +
> +def _inlinediff(from_string, to_string, direction):

Nit: name_with_underscore is banned.
https://www.mercurial-scm.org/wiki/CodingStyle#Naming_conventions

> +    '''Perform string diff to highlight specific changes.'''
> +    direction_skip = '+?' if direction == 'deleted' else '-?'
> +    if direction == 'deleted':
> +        to_string, from_string = from_string, to_string
> +
> +    # buffer required to remove last space, there may be smarter ways to do this
> +    buff = []
> +
> +    # we never want to higlight the leading +-
> +    if direction == 'deleted' and to_string.startswith('-'):
> +        buff.append(('diff.deleted', '-'))
> +        to_string = to_string[1:]
> +        from_string = from_string[1:]
> +    elif direction == 'inserted' and from_string.startswith('+'):
> +        buff.append(('diff.inserted', '+'))
> +        to_string = to_string[1:]
> +        from_string = from_string[1:]
> +
> +    s = difflib.ndiff(to_string.split(' '), from_string.split(' '))

re.split(r'(\W)', s) will produce a better result.

https://stackoverflow.com/a/2136580

FWIW, I tried the following change on top of this patch. The output was too
verbose, but seemed more correct.

Patch

diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -1587,6 +1587,11 @@  def _chunkdiff(store):
         '''For each string in list1, finds matching string in list2 and returns
            an iterator over their differences.
         '''
+        buff = _inlinediff(''.join(list1), ''.join(list2), direction=direction)
+        for line in buff:
+            yield (line[1], line[0])
+        return
+
         used = []
         for a in list1:
             done = False
@@ -1626,14 +1631,14 @@  def _inlinediff(from_string, to_string, 
         to_string = to_string[1:]
         from_string = from_string[1:]
 
-    s = difflib.ndiff(to_string.split(' '), from_string.split(' '))
+    s = difflib.ndiff(re.split(r'(\W)', to_string), re.split(r'(\W)', from_string))
     for line in s:
         if line[0] in direction_skip:
             continue
         l = 'diff.' + direction + '.highlight'
         if line[0] in ' ': # unchanged parts
             l = 'diff.' + direction
-        buff.append((l, line[2:] + ' '))
+        buff.append((l, line[2:]))
 
     buff[-1] = (buff[-1][0], buff[-1][1].strip(' '))
     return buff