Patchwork [1,of,3,V2] annotate: do not construct attr.s object per line while computing history

login
register
mail settings
Submitter Yuya Nishihara
Date March 18, 2018, 3:45 a.m.
Message ID <2d8154d8b27e0452aee5.1521344732@mimosa>
Download mbox | patch
Permalink /patch/29589/
State Accepted
Headers show

Comments

Yuya Nishihara - March 18, 2018, 3:45 a.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1520855110 -32400
#      Mon Mar 12 20:45:10 2018 +0900
# Node ID 2d8154d8b27e0452aee52f45d48370c7ca9f837b
# Parent  66e64681e0a8ccbf3576d99ceb51f7259e352689
annotate: do not construct attr.s object per line while computing history

Unfortunately, good abstraction has a cost. It's way slower to construct
an annotateline() object than creating a plain tuple or a list. This patch
changes the internal data structure from row-based to columnar, so the
decorate() function can be instant (i.e. no Python in hot loop.)

For code readability, the outermost tuple is switched to an attr.s object
instead.

  (original, row-based attr.s)
  $ hg annot mercurial/commands.py --time > /dev/null
  time: real 11.470 secs (user 11.400+0.000 sys 0.070+0.000)
  $ hg annot mercurial/commands.py --time --line-number > /dev/null
  time: real 39.590 secs (user 39.500+0.000 sys 0.080+0.000)

  (this patch, columnar)
  $ hg annot mercurial/commands.py --time > /dev/null
  time: real 11.780 secs (user 11.710+0.000 sys 0.070+0.000)
  $ hg annot mercurial/commands.py --time --line-number > /dev/null
  time: real 12.240 secs (user 12.170+0.000 sys 0.090+0.000)

  (cf. 4.3.3, row-based tuple)
  $ hg annot mercurial/commands.py --time --line-number > /dev/null
  time: real 19.540 secs (user 19.460+0.000 sys 0.080+0.000)

Patch

diff --git a/mercurial/dagop.py b/mercurial/dagop.py
--- a/mercurial/dagop.py
+++ b/mercurial/dagop.py
@@ -369,6 +369,15 @@  class annotateline(object):
     # Whether this annotation was the result of a skip-annotate.
     skip = attr.ib(default=False)
 
+@attr.s(slots=True, frozen=True)
+class _annotatedfile(object):
+    # list indexed by lineno - 1
+    fctxs = attr.ib()
+    linenos = attr.ib()
+    skips = attr.ib()
+    # full file content
+    text = attr.ib()
+
 def _countlines(text):
     if text.endswith("\n"):
         return text.count("\n")
@@ -385,7 +394,7 @@  def _annotatepair(parents, childfctx, ch
 
     See test-annotate.py for unit tests.
     '''
-    pblocks = [(parent, mdiff.allblocks(parent[1], child[1], opts=diffopts))
+    pblocks = [(parent, mdiff.allblocks(parent.text, child.text, opts=diffopts))
                for parent in parents]
 
     if skipchild:
@@ -398,7 +407,9 @@  def _annotatepair(parents, childfctx, ch
             # Changed blocks ('!') or blocks made only of blank lines ('~')
             # belong to the child.
             if t == '=':
-                child[0][b1:b2] = parent[0][a1:a2]
+                child.fctxs[b1:b2] = parent.fctxs[a1:a2]
+                child.linenos[b1:b2] = parent.linenos[a1:a2]
+                child.skips[b1:b2] = parent.skips[a1:a2]
 
     if skipchild:
         # Now try and match up anything that couldn't be matched,
@@ -419,9 +430,11 @@  def _annotatepair(parents, childfctx, ch
             for (a1, a2, b1, b2), _t in blocks:
                 if a2 - a1 >= b2 - b1:
                     for bk in xrange(b1, b2):
-                        if child[0][bk].fctx == childfctx:
+                        if child.fctxs[bk] == childfctx:
                             ak = min(a1 + (bk - b1), a2 - 1)
-                            child[0][bk] = attr.evolve(parent[0][ak], skip=True)
+                            child.fctxs[bk] = parent.fctxs[ak]
+                            child.linenos[bk] = parent.linenos[ak]
+                            child.skips[bk] = True
                 else:
                     remaining[idx][1].append((a1, a2, b1, b2))
 
@@ -430,9 +443,11 @@  def _annotatepair(parents, childfctx, ch
         for parent, blocks in remaining:
             for a1, a2, b1, b2 in blocks:
                 for bk in xrange(b1, b2):
-                    if child[0][bk].fctx == childfctx:
+                    if child.fctxs[bk] == childfctx:
                         ak = min(a1 + (bk - b1), a2 - 1)
-                        child[0][bk] = attr.evolve(parent[0][ak], skip=True)
+                        child.fctxs[bk] = parent.fctxs[ak]
+                        child.linenos[bk] = parent.linenos[ak]
+                        child.skips[bk] = True
     return child
 
 def annotate(base, parents, linenumber=False, skiprevs=None, diffopts=None):
@@ -443,11 +458,13 @@  def annotate(base, parents, linenumber=F
 
     if linenumber:
         def decorate(text, fctx):
-            return ([annotateline(fctx=fctx, lineno=i)
-                     for i in xrange(1, _countlines(text) + 1)], text)
+            n = _countlines(text)
+            linenos = pycompat.rangelist(1, n + 1)
+            return _annotatedfile([fctx] * n, linenos, [False] * n, text)
     else:
         def decorate(text, fctx):
-            return ([annotateline(fctx=fctx)] * _countlines(text), text)
+            n = _countlines(text)
+            return _annotatedfile([fctx] * n, [False] * n, [False] * n, text)
 
     # This algorithm would prefer to be recursive, but Python is a
     # bit recursion-hostile. Instead we do an iterative
@@ -501,8 +518,10 @@  def annotate(base, parents, linenumber=F
             hist[f] = curr
             del pcache[f]
 
-    lineattrs, text = hist[base]
-    return pycompat.ziplist(lineattrs, mdiff.splitnewlines(text))
+    a = hist[base]
+    return [(annotateline(fctx, lineno, skip), line)
+            for fctx, lineno, skip, line
+            in zip(a.fctxs, a.linenos, a.skips, mdiff.splitnewlines(a.text))]
 
 def toposort(revs, parentsfunc, firstbranch=()):
     """Yield revisions from heads to roots one (topo) branch at a time.
diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py
--- a/mercurial/pycompat.py
+++ b/mercurial/pycompat.py
@@ -71,6 +71,9 @@  if ispy3:
     def maplist(*args):
         return list(map(*args))
 
+    def rangelist(*args):
+        return list(range(*args))
+
     def ziplist(*args):
         return list(zip(*args))
 
@@ -348,6 +351,7 @@  else:
     bytesio = cStringIO.StringIO
     stringio = bytesio
     maplist = map
+    rangelist = range
     ziplist = zip
     rawinput = raw_input
     getargspec = inspect.getargspec
diff --git a/tests/test-annotate.py b/tests/test-annotate.py
--- a/tests/test-annotate.py
+++ b/tests/test-annotate.py
@@ -5,12 +5,18 @@  import unittest
 
 from mercurial import (
     mdiff,
+    pycompat,
 )
 from mercurial.dagop import (
     annotateline,
+    _annotatedfile,
     _annotatepair,
 )
 
+def tr(a):
+    return [annotateline(fctx, lineno, skip)
+            for fctx, lineno, skip in zip(a.fctxs, a.linenos, a.skips)]
+
 class AnnotateTests(unittest.TestCase):
     """Unit tests for annotate code."""
 
@@ -26,16 +32,16 @@  class AnnotateTests(unittest.TestCase):
         diffopts = mdiff.diffopts()
 
         def decorate(text, fctx):
-            return ([annotateline(fctx=fctx, lineno=i)
-                     for i in range(1, text.count(b'\n') + 1)],
-                    text)
+            n = text.count(b'\n')
+            linenos = pycompat.rangelist(1, n + 1)
+            return _annotatedfile([fctx] * n, linenos, [False] * n, text)
 
         # Basic usage
 
         oldann = decorate(olddata, oldfctx)
         p1ann = decorate(p1data, p1fctx)
         p1ann = _annotatepair([oldann], p1fctx, p1ann, False, diffopts)
-        self.assertEqual(p1ann[0], [
+        self.assertEqual(tr(p1ann), [
             annotateline(b'old', 1),
             annotateline(b'old', 2),
             annotateline(b'p1', 3),
@@ -43,7 +49,7 @@  class AnnotateTests(unittest.TestCase):
 
         p2ann = decorate(p2data, p2fctx)
         p2ann = _annotatepair([oldann], p2fctx, p2ann, False, diffopts)
-        self.assertEqual(p2ann[0], [
+        self.assertEqual(tr(p2ann), [
             annotateline(b'old', 1),
             annotateline(b'p2', 2),
             annotateline(b'p2', 3),
@@ -54,7 +60,7 @@  class AnnotateTests(unittest.TestCase):
         childann = decorate(childdata, childfctx)
         childann = _annotatepair([p1ann, p2ann], childfctx, childann, False,
                                  diffopts)
-        self.assertEqual(childann[0], [
+        self.assertEqual(tr(childann), [
             annotateline(b'old', 1),
             annotateline(b'c', 2),
             annotateline(b'p2', 2),
@@ -65,7 +71,7 @@  class AnnotateTests(unittest.TestCase):
         childann = decorate(childdata, childfctx)
         childann = _annotatepair([p2ann, p1ann], childfctx, childann, False,
                                  diffopts)
-        self.assertEqual(childann[0], [
+        self.assertEqual(tr(childann), [
             annotateline(b'old', 1),
             annotateline(b'c', 2),
             annotateline(b'p1', 3),
@@ -78,7 +84,7 @@  class AnnotateTests(unittest.TestCase):
         childann = decorate(childdata, childfctx)
         childann = _annotatepair([p1ann, p2ann], childfctx, childann, True,
                                  diffopts)
-        self.assertEqual(childann[0], [
+        self.assertEqual(tr(childann), [
             annotateline(b'old', 1),
             annotateline(b'old', 2, True),
             # note that this line was carried over from earlier so it is *not*
@@ -91,7 +97,7 @@  class AnnotateTests(unittest.TestCase):
         childann = decorate(childdata, childfctx)
         childann = _annotatepair([p2ann, p1ann], childfctx, childann, True,
                                  diffopts)
-        self.assertEqual(childann[0], [
+        self.assertEqual(tr(childann), [
             annotateline(b'old', 1),
             annotateline(b'old', 2, True),
             annotateline(b'p1', 3),