Patchwork [2,of,2,STABLE] _adjustlinkrev: reuse ancestors set during rename detection (issue4514)

login
register
mail settings
Submitter Pierre-Yves David
Date Jan. 30, 2015, 5:20 p.m.
Message ID <9e07d1f5d85ad76fa4a6.1422638434@marginatus.alto.octopoid.net>
Download mbox | patch
Permalink /patch/7580/
State Accepted
Commit c1ce5442453ff6fe63e7da0f8804db1845ecaf98
Headers show

Comments

Pierre-Yves David - Jan. 30, 2015, 5:20 p.m.
# HG changeset patch
# User Pierre-Yves David <pierre-yves.david@fb.com>
# Date 1422633748 0
#      Fri Jan 30 16:02:28 2015 +0000
# Branch stable
# Node ID 9e07d1f5d85ad76fa4a6b0191a54b9bcdac1442c
# Parent  b9d18d103d7aa91ac999630ec582300a5cb9a596
_adjustlinkrev: reuse ancestors set during rename detection (issue4514)

The new linkrev adjustement mechanism makes renames detection very slow, because
each file rewalk the ancestors dag. To mitigate the issue in Mercurial 3.3, we
introduce a simplistic was to share the ancestors computation for the linkrev
validation phase.

We can reuse the ancestors in that case because we do not care about
sub-branching in the ancestors graph.

The cached set will be use to check if the linkrev is valid in the search
context. This is the vast majority of the ancestors usage during copies search
since the uncached one will only be used when linkrev is invalid, which is
hopefully rare.

Patch

diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -764,14 +764,21 @@  class basefilectx(object):
         cl = repo.unfiltered().changelog
         ma = repo.manifest
         # fetch the linkrev
         fr = filelog.rev(fnode)
         lkr = filelog.linkrev(fr)
+        # hack to reuse ancestor computation when searching for renames
+        memberanc = getattr(self, '_ancestrycontext', None)
+        iteranc = None
+        if memberanc is None:
+            memberanc = iteranc = cl.ancestors([srcrev], lkr,
+                                               inclusive=inclusive)
         # check if this linkrev is an ancestor of srcrev
-        anc = cl.ancestors([srcrev], lkr, inclusive=inclusive)
-        if lkr not in anc:
-            for a in anc:
+        if lkr not in memberanc:
+            if iteranc is None:
+                iteranc = cl.ancestors([srcrev], lkr, inclusive=inclusive)
+            for a in iteranc:
                 ac = cl.read(a) # get changeset data (we avoid object creation)
                 if path in ac[3]: # checking the 'files' field.
                     # The file has been touched, check if the content is
                     # similar to the one we search for.
                     if fnode == ma.readfast(ac[0]).get(path):
@@ -824,10 +831,12 @@  class basefilectx(object):
                 # fed), ensure the created filectx is associated with a
                 # changeset that is an ancestor of self.changectx.
                 rev = self._adjustlinkrev(path, l, fnode, self.rev())
                 fctx = filectx(self._repo, path, fileid=fnode, filelog=l,
                                changeid=rev)
+                fctx._ancestrycontext = getattr(self, '_ancestrycontext', None)
+
             else:
                 fctx = filectx(self._repo, path, fileid=fnode, filelog=l)
             ret.append(fctx)
         return ret
 
diff --git a/mercurial/copies.py b/mercurial/copies.py
--- a/mercurial/copies.py
+++ b/mercurial/copies.py
@@ -129,10 +129,11 @@  def _chain(src, dst, a, b):
 def _tracefile(fctx, am, limit=-1):
     '''return file context that is the ancestor of fctx present in ancestor
     manifest am, stopping after the first ancestor lower than limit'''
 
     for f in fctx.ancestors():
+        assert f._ancestrycontext is not None
         if am.get(f.path(), None) == f.filenode():
             return f
         if f.rev() < limit:
             return None
 
@@ -168,12 +169,15 @@  def _forwardcopies(a, b):
     # this means we can miss a case like 'hg rm b; hg cp a b'
     cm = {}
     missing = set(b.manifest().iterkeys())
     missing.difference_update(a.manifest().iterkeys())
 
+    ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
     for f in missing:
-        ofctx = _tracefile(b[f], am, limit)
+        fctx = b[f]
+        fctx._ancestrycontext = ancestrycontext
+        ofctx = _tracefile(fctx, am, limit)
         if ofctx:
             cm[f] = ofctx.path()
 
     # combine copies from dirstate if necessary
     if w is not None: