Patchwork D1941: mdiff: explicitly compute places for the newline marker

login
register
mail settings
Submitter phabricator
Date Jan. 26, 2018, 1:35 a.m.
Message ID <differential-rev-PHID-DREV-qtqgw2rfoqo7gfio7bzt-req@phab.mercurial-scm.org>
Download mbox | patch
Permalink /patch/27064/
State Superseded
Headers show

Comments

phabricator - Jan. 26, 2018, 1:35 a.m.
joerg.sonnenberger created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1941

AFFECTED FILES
  mercurial/mdiff.py

CHANGE DETAILS




To: joerg.sonnenberger, #hg-reviewers
Cc: mercurial-devel
phabricator - Jan. 26, 2018, 1:36 a.m.
joerg.sonnenberger added a comment.


  I don't completely trust my numbers here, but it seems to give at least 3-4% for diffing netbsd-7 and netbsd-8.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1941

To: joerg.sonnenberger, #hg-reviewers
Cc: mercurial-devel
phabricator - Jan. 26, 2018, 5 a.m.
durin42 added a comment.


  3-4% is in line with what I'm seeing with ./hg perfunidiff mercurial/manifest.py 0 --count 500 --profile, and this looks like a better fix than what I've got. I'm a little weirded out by the change in interface where _unidiff yields a bool and then a sequence of hunks - was that needed for the change to work? I'm having trouble telling.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1941

To: joerg.sonnenberger, #hg-reviewers
Cc: durin42, mercurial-devel
phabricator - Jan. 26, 2018, 10:53 a.m.
joerg.sonnenberger added a comment.


  No, the change to yield a bool first is not strictly necessary for the rest of the logic. But it cheaply kills the need for the `rewindhunk` generator, i.e. it cuts a layer of indirection cheaply.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1941

To: joerg.sonnenberger, #hg-reviewers
Cc: durin42, mercurial-devel
phabricator - Jan. 26, 2018, 3:13 p.m.
durin42 added a comment.


  I'd slightly prefer this as two patches: one that does away with the rewindhunks goo (yay!), and then one that does the book-keeping to intelligently emit the "no newline" business. Can you tackle that?

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1941

To: joerg.sonnenberger, #hg-reviewers
Cc: durin42, mercurial-devel

Patch

diff --git a/mercurial/mdiff.py b/mercurial/mdiff.py
--- a/mercurial/mdiff.py
+++ b/mercurial/mdiff.py
@@ -19,6 +19,8 @@ 
     util,
 )
 
+_missing_newline_marker = "\\ No newline at end of file\n"
+
 bdiff = policy.importmod(r'bdiff')
 mpatch = policy.importmod(r'mpatch')
 
@@ -267,18 +269,13 @@ 
     fn1 = util.pconvert(fn1)
     fn2 = util.pconvert(fn2)
 
-    def checknonewline(lines):
-        for text in lines:
-            if text[-1:] != '\n':
-                text += "\n\ No newline at end of file\n"
-            yield text
-
     if not opts.text and check_binary and (util.binary(a) or util.binary(b)):
         if a and b and len(a) == len(b) and a == b:
             return sentinel
         headerlines = []
         hunks = (None, ['Binary file %s has changed\n' % fn1]),
     elif not a:
+        without_newline = b[-1] != '\n'
         b = splitnewlines(b)
         if a is None:
             l1 = '--- /dev/null%s' % datetag(epoch)
@@ -289,8 +286,12 @@ 
         size = len(b)
         hunkrange = (0, 0, 1, size)
         hunklines = ["@@ -0,0 +1,%d @@\n" % size] + ["+" + e for e in b]
-        hunks = (hunkrange, checknonewline(hunklines)),
+        if without_newline:
+            hunklines[-1] += '\n'
+            hunklines.append(_missing_newline_marker)
+        hunks = (hunkrange, hunklines),
     elif not b:
+        without_newline = a[-1] != '\n'
         a = splitnewlines(a)
         l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
         if b is None:
@@ -301,24 +302,19 @@ 
         size = len(a)
         hunkrange = (1, size, 0, 0)
         hunklines = ["@@ -1,%d +0,0 @@\n" % size] + ["-" + e for e in a]
-        hunks = (hunkrange, checknonewline(hunklines)),
+        if without_newline:
+            hunklines[-1] += '\n'
+            hunklines.append(_missing_newline_marker)
+        hunks = (hunkrange, hunklines),
     else:
-        diffhunks = _unidiff(a, b, opts=opts)
-        try:
-            hunkrange, hunklines = next(diffhunks)
-        except StopIteration:
+        hunks = _unidiff(a, b, opts=opts)
+        if not next(hunks):
             return sentinel
 
         headerlines = [
             "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
             "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
         ]
-        def rewindhunks():
-            yield hunkrange, checknonewline(hunklines)
-            for hr, hl in diffhunks:
-                yield hr, checknonewline(hl)
-
-        hunks = rewindhunks()
 
     return headerlines, hunks
 
@@ -330,6 +326,8 @@ 
     form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
     of the hunk combining said header followed by line additions and
     deletions.
+
+    The hunks are prefixed with a bool.
     """
     l1 = splitnewlines(t1)
     l2 = splitnewlines(t2)
@@ -380,14 +378,35 @@ 
             + delta
             + [' ' + l1[x] for x in xrange(a2, aend)]
         )
+        # If either file ends without a newline and the last line of
+        # that file is part of a hunk, a marker is printed. If the
+        # last line of both files is identical and neither ends in
+        # a newline, print only one marker. That's the only case in
+        # which the hunk can end in a shared line without a newline.
+        skip = False
+        if t1[-1] != '\n' and astart + alen == len(l1) + 1:
+            for i in xrange(len(hunklines) - 1, -1, -1):
+                if hunklines[i][0] in ('-', ' '):
+                    if hunklines[i][0] == ' ':
+                        skip = True
+                    hunklines[i] += '\n'
+                    hunklines.insert(i + 1, _missing_newline_marker)
+                    break
+        if not skip and t2[-1] != '\n' and bstart + blen == len(l2) + 1:
+            for i in xrange(len(hunklines) - 1, -1, -1):
+                if hunklines[i][0] == '+':
+                    hunklines[i] += '\n'
+                    hunklines.insert(i + 1, _missing_newline_marker)
+                    break
         yield hunkrange, hunklines
 
     # bdiff.blocks gives us the matching sequences in the files.  The loop
     # below finds the spaces between those matching sequences and translates
     # them into diff output.
     #
     hunk = None
     ignoredlines = 0
+    has_hunks = False
     for s, stype in allblocks(t1, t2, opts, l1, l2):
         a1, a2, b1, b2 = s
         if stype != '!':
@@ -414,6 +433,9 @@ 
                 astart = hunk[1]
                 bstart = hunk[3]
             else:
+                if not has_hunks:
+                    has_hunks = True
+                    yield True
                 for x in yieldhunk(hunk):
                     yield x
         if prev:
@@ -430,8 +452,13 @@ 
         delta[len(delta):] = ['+' + x for x in new]
 
     if hunk:
+        if not has_hunks:
+            has_hunks = True
+            yield True
         for x in yieldhunk(hunk):
             yield x
+    elif not has_hunks:
+        yield False
 
 def b85diff(to, tn):
     '''print base85-encoded binary diff'''