Patchwork [3,of,4] annotate: do not construct attr.s object per line while computing history

login
register
mail settings
Submitter Yuya Nishihara
Date March 13, 2018, 1:55 p.m.
Message ID <1ce0d903cfa6e8db0112.1520949311@mimosa>
Download mbox | patch
Permalink /patch/29438/
State Accepted
Headers show

Comments

Yuya Nishihara - March 13, 2018, 1:55 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1520855110 -32400
#      Mon Mar 12 20:45:10 2018 +0900
# Node ID 1ce0d903cfa6e8db0112f86cc346751651db1f80
# Parent  95af54712a25c298d5ee63c385f3d498250c1624
annotate: do not construct attr.s object per line while computing history

Unfortunately, it's way slower to construct an annotateline() object than
creating a plain tuple or a list. This patch changes the internal data
structure from row-based to columnar, so the decorate() function can be
instant.

  (original)
  $ hg annot mercurial/commands.py --time > /dev/null
  time: real 11.470 secs (user 11.400+0.000 sys 0.070+0.000)
  $ hg annot mercurial/commands.py --time --line-number > /dev/null
  time: real 39.590 secs (user 39.500+0.000 sys 0.080+0.000)

  (this patch)
  $ hg annot mercurial/commands.py --time > /dev/null
  time: real 11.780 secs (user 11.710+0.000 sys 0.070+0.000)
  $ hg annot mercurial/commands.py --time --line-number > /dev/null
  time: real 12.240 secs (user 12.170+0.000 sys 0.090+0.000)

Patch

diff --git a/mercurial/dagop.py b/mercurial/dagop.py
--- a/mercurial/dagop.py
+++ b/mercurial/dagop.py
@@ -365,10 +365,19 @@  def blockdescendants(fctx, fromline, tol
 @attr.s(slots=True, frozen=True)
 class annotateline(object):
     fctx = attr.ib()
-    lineno = attr.ib(default=False)
+    lineno = attr.ib()
     # Whether this annotation was the result of a skip-annotate.
     skip = attr.ib(default=False)
 
+@attr.s(slots=True, frozen=True)
+class _annotatedfile(object):
+    # list indexed by lineno - 1
+    fctxs = attr.ib()
+    linenos = attr.ib()
+    skips = attr.ib()
+    # full file content
+    text = attr.ib()
+
 def _countlines(text):
     if text.endswith("\n"):
         return text.count("\n")
@@ -385,7 +394,7 @@  def _annotatepair(parents, childfctx, ch
 
     See test-annotate.py for unit tests.
     '''
-    pblocks = [(parent, mdiff.allblocks(parent[1], child[1], opts=diffopts))
+    pblocks = [(parent, mdiff.allblocks(parent.text, child.text, opts=diffopts))
                for parent in parents]
 
     if skipchild:
@@ -398,7 +407,9 @@  def _annotatepair(parents, childfctx, ch
             # Changed blocks ('!') or blocks made only of blank lines ('~')
             # belong to the child.
             if t == '=':
-                child[0][b1:b2] = parent[0][a1:a2]
+                child.fctxs[b1:b2] = parent.fctxs[a1:a2]
+                child.linenos[b1:b2] = parent.linenos[a1:a2]
+                child.skips[b1:b2] = parent.skips[a1:a2]
 
     if skipchild:
         # Now try and match up anything that couldn't be matched,
@@ -419,9 +430,11 @@  def _annotatepair(parents, childfctx, ch
             for (a1, a2, b1, b2), _t in blocks:
                 if a2 - a1 >= b2 - b1:
                     for bk in xrange(b1, b2):
-                        if child[0][bk].fctx == childfctx:
+                        if child.fctxs[bk] == childfctx:
                             ak = min(a1 + (bk - b1), a2 - 1)
-                            child[0][bk] = attr.evolve(parent[0][ak], skip=True)
+                            child.fctxs[bk] = parent.fctxs[ak]
+                            child.linenos[bk] = parent.linenos[ak]
+                            child.skips[bk] = True
                 else:
                     remaining[idx][1].append((a1, a2, b1, b2))
 
@@ -430,9 +443,11 @@  def _annotatepair(parents, childfctx, ch
         for parent, blocks in remaining:
             for a1, a2, b1, b2 in blocks:
                 for bk in xrange(b1, b2):
-                    if child[0][bk].fctx == childfctx:
+                    if child.fctxs[bk] == childfctx:
                         ak = min(a1 + (bk - b1), a2 - 1)
-                        child[0][bk] = attr.evolve(parent[0][ak], skip=True)
+                        child.fctxs[bk] = parent.fctxs[ak]
+                        child.linenos[bk] = parent.linenos[ak]
+                        child.skips[bk] = True
     return child
 
 def annotate(base, parents, linenumber=False, skiprevs=None, diffopts=None):
@@ -443,11 +458,13 @@  def annotate(base, parents, linenumber=F
 
     if linenumber:
         def decorate(text, fctx):
-            return ([annotateline(fctx=fctx, lineno=i)
-                     for i in xrange(1, _countlines(text) + 1)], text)
+            n = _countlines(text)
+            linenos = pycompat.rangelist(1, n + 1)
+            return _annotatedfile([fctx] * n, linenos, [False] * n, text)
     else:
         def decorate(text, fctx):
-            return ([annotateline(fctx=fctx)] * _countlines(text), text)
+            n = _countlines(text)
+            return _annotatedfile([fctx] * n, [False] * n, [False] * n, text)
 
     # This algorithm would prefer to be recursive, but Python is a
     # bit recursion-hostile. Instead we do an iterative
@@ -501,8 +518,10 @@  def annotate(base, parents, linenumber=F
             hist[f] = curr
             del pcache[f]
 
-    lineattrs, text = hist[base]
-    return pycompat.ziplist(lineattrs, mdiff.splitnewlines(text))
+    a = hist[base]
+    return [(annotateline(fctx, lineno, skip), line)
+            for fctx, lineno, skip, line
+            in zip(a.fctxs, a.linenos, a.skips, mdiff.splitnewlines(a.text))]
 
 def toposort(revs, parentsfunc, firstbranch=()):
     """Yield revisions from heads to roots one (topo) branch at a time.
diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py
--- a/mercurial/pycompat.py
+++ b/mercurial/pycompat.py
@@ -69,6 +69,9 @@  if ispy3:
     def maplist(*args):
         return list(map(*args))
 
+    def rangelist(*args):
+        return list(range(*args))
+
     def ziplist(*args):
         return list(zip(*args))
 
@@ -345,6 +348,7 @@  else:
     shlexsplit = shlex.split
     stringio = cStringIO.StringIO
     maplist = map
+    rangelist = range
     ziplist = zip
     rawinput = raw_input
     getargspec = inspect.getargspec
diff --git a/tests/test-annotate.py b/tests/test-annotate.py
--- a/tests/test-annotate.py
+++ b/tests/test-annotate.py
@@ -5,12 +5,18 @@  import unittest
 
 from mercurial import (
     mdiff,
+    pycompat,
 )
 from mercurial.dagop import (
     annotateline,
+    _annotatedfile,
     _annotatepair,
 )
 
+def tr(a):
+    return [annotateline(fctx, lineno, skip)
+            for fctx, lineno, skip in zip(a.fctxs, a.linenos, a.skips)]
+
 class AnnotateTests(unittest.TestCase):
     """Unit tests for annotate code."""
 
@@ -26,16 +32,16 @@  class AnnotateTests(unittest.TestCase):
         diffopts = mdiff.diffopts()
 
         def decorate(text, fctx):
-            return ([annotateline(fctx=fctx, lineno=i)
-                     for i in range(1, text.count(b'\n') + 1)],
-                    text)
+            n = text.count(b'\n')
+            linenos = pycompat.rangelist(1, n + 1)
+            return _annotatedfile([fctx] * n, linenos, [False] * n, text)
 
         # Basic usage
 
         oldann = decorate(olddata, oldfctx)
         p1ann = decorate(p1data, p1fctx)
         p1ann = _annotatepair([oldann], p1fctx, p1ann, False, diffopts)
-        self.assertEqual(p1ann[0], [
+        self.assertEqual(tr(p1ann), [
             annotateline(b'old', 1),
             annotateline(b'old', 2),
             annotateline(b'p1', 3),
@@ -43,7 +49,7 @@  class AnnotateTests(unittest.TestCase):
 
         p2ann = decorate(p2data, p2fctx)
         p2ann = _annotatepair([oldann], p2fctx, p2ann, False, diffopts)
-        self.assertEqual(p2ann[0], [
+        self.assertEqual(tr(p2ann), [
             annotateline(b'old', 1),
             annotateline(b'p2', 2),
             annotateline(b'p2', 3),
@@ -54,7 +60,7 @@  class AnnotateTests(unittest.TestCase):
         childann = decorate(childdata, childfctx)
         childann = _annotatepair([p1ann, p2ann], childfctx, childann, False,
                                  diffopts)
-        self.assertEqual(childann[0], [
+        self.assertEqual(tr(childann), [
             annotateline(b'old', 1),
             annotateline(b'c', 2),
             annotateline(b'p2', 2),
@@ -65,7 +71,7 @@  class AnnotateTests(unittest.TestCase):
         childann = decorate(childdata, childfctx)
         childann = _annotatepair([p2ann, p1ann], childfctx, childann, False,
                                  diffopts)
-        self.assertEqual(childann[0], [
+        self.assertEqual(tr(childann), [
             annotateline(b'old', 1),
             annotateline(b'c', 2),
             annotateline(b'p1', 3),
@@ -78,7 +84,7 @@  class AnnotateTests(unittest.TestCase):
         childann = decorate(childdata, childfctx)
         childann = _annotatepair([p1ann, p2ann], childfctx, childann, True,
                                  diffopts)
-        self.assertEqual(childann[0], [
+        self.assertEqual(tr(childann), [
             annotateline(b'old', 1),
             annotateline(b'old', 2, True),
             # note that this line was carried over from earlier so it is *not*
@@ -91,7 +97,7 @@  class AnnotateTests(unittest.TestCase):
         childann = decorate(childdata, childfctx)
         childann = _annotatepair([p2ann, p1ann], childfctx, childann, True,
                                  diffopts)
-        self.assertEqual(childann[0], [
+        self.assertEqual(tr(childann), [
             annotateline(b'old', 1),
             annotateline(b'old', 2, True),
             annotateline(b'p1', 3),