Patchwork [2,of,2] dirstate: track otherparent files same as nonnormal

login
register
mail settings
Submitter Durham Goode
Date March 6, 2017, 12:24 a.m.
Message ID <f902e85491b50b1d2495.1488759852@dev111.prn1.facebook.com>
Download mbox | patch
Permalink /patch/18912/
State Superseded
Headers show

Comments

Durham Goode - March 6, 2017, 12:24 a.m.
# HG changeset patch
# User Durham Goode <durham@fb.com>
# Date 1488759650 28800
#      Sun Mar 05 16:20:50 2017 -0800
# Node ID f902e85491b50b1d2495fecb6b4ce2c48829ec12
# Parent  4a751a57ef0682dc3d7e46bdfa67a26b13cd9031
dirstate: track otherparent files same as nonnormal

Calling dirstate.setparents() is expensive in a large repo because it iterates
over every file in the dirstate. It does so to undo any merge state or
otherparent state files. Merge state files are already covered by
dirstate._nonnormalset, so we just need to track otherparent files in a similar
manner to avoid the full iteration here.

Fixing this shaves 20-25% off histedit in large repos.

I tested this by adding temporary debug logic to verify that the old files
processed in the loop matched the new files processed in the loop and running
the test suite.
Durham Goode - March 6, 2017, 12:51 a.m.
On 3/5/17 4:24 PM, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham@fb.com>
> # Date 1488759650 28800
> #      Sun Mar 05 16:20:50 2017 -0800
> # Node ID f902e85491b50b1d2495fecb6b4ce2c48829ec12
> # Parent  4a751a57ef0682dc3d7e46bdfa67a26b13cd9031
> dirstate: track otherparent files same as nonnormal
>
> Calling dirstate.setparents() is expensive in a large repo because it iterates
> over every file in the dirstate. It does so to undo any merge state or
> otherparent state files. Merge state files are already covered by
> dirstate._nonnormalset, so we just need to track otherparent files in a similar
> manner to avoid the full iteration here.
>
> Fixing this shaves 20-25% off histedit in large repos.
>
> I tested this by adding temporary debug logic to verify that the old files
> processed in the loop matched the new files processed in the loop and running
> the test suite.

Woops, sent this out before doing a final amend.  V2 sent

Patch

diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -57,8 +57,14 @@  def nonnormalentries(dmap):
     try:
         return parsers.nonnormalentries(dmap)
     except AttributeError:
-        return set(fname for fname, e in dmap.iteritems()
-                   if e[0] != 'n' or e[3] == -1)
+        nonnorm = set()
+        otherparent = set()
+        for fname, e in dmap.iteritems():
+            if e[0] != 'n' or e[3] == -1:
+                nonnorm.add(fname)
+            if e[0] == 'n' and e[2] == -2:
+                otherparent.add(fname)
+        return nonnorm, otherparent
 
 class dirstate(object):
 
@@ -131,7 +137,15 @@  class dirstate(object):
 
     @propertycache
     def _nonnormalset(self):
-        return nonnormalentries(self._map)
+        nonnorm, otherparents = nonnormalentries(self._map)
+        self._otherparentset = otherparents
+        return nonnorm
+
+    @propertycache
+    def _otherparentset(self):
+        nonnorm, otherparents = nonnormalentries(self._map)
+        self._nonnormalset = nonnorm
+        return otherparents
 
     @propertycache
     def _filefoldmap(self):
@@ -341,7 +355,9 @@  class dirstate(object):
         self._pl = p1, p2
         copies = {}
         if oldp2 != nullid and p2 == nullid:
-            for f, s in self._map.iteritems():
+            candidatefiles = self._nonnormalset.union(self._otherparentset)
+            for f in candidatefiles:
+                s = self._map[f]
                 # Discard 'm' markers when moving away from a merge state
                 if s[0] == 'm':
                     if f in self._copymap:
@@ -427,7 +443,8 @@  class dirstate(object):
 
     def invalidate(self):
         for a in ("_map", "_copymap", "_filefoldmap", "_dirfoldmap", "_branch",
-                  "_pl", "_dirs", "_ignore", "_nonnormalset"):
+                  "_pl", "_dirs", "_ignore", "_nonnormalset",
+                  "_otherparentset"):
             if a in self.__dict__:
                 delattr(self, a)
         self._lastnormaltime = 0
@@ -486,6 +503,8 @@  class dirstate(object):
         self._map[f] = dirstatetuple(state, mode, size, mtime)
         if state != 'n' or mtime == -1:
             self._nonnormalset.add(f)
+        if size == -2:
+            self._otherparentset.add(f)
 
     def normal(self, f):
         '''Mark a file normal and clean.'''
@@ -560,6 +579,7 @@  class dirstate(object):
                 size = -1
             elif entry[0] == 'n' and entry[2] == -2: # other parent
                 size = -2
+                self._otherparentset.add(f)
         self._map[f] = dirstatetuple('r', 0, size, 0)
         self._nonnormalset.add(f)
         if size == 0 and f in self._copymap:
@@ -758,7 +778,7 @@  class dirstate(object):
                     break
 
         st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
-        self._nonnormalset = nonnormalentries(self._map)
+        self._nonnormalset, self._otherparentset = nonnormalentries(self._map)
         st.close()
         self._lastnormaltime = 0
         self._dirty = self._dirtypl = False
diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -564,7 +564,8 @@  quit:
 */
 static PyObject *nonnormalentries(PyObject *self, PyObject *args)
 {
-	PyObject *dmap, *nonnset = NULL, *fname, *v;
+	PyObject *dmap, *fname, *v;
+	PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
 	Py_ssize_t pos;
 
 	if (!PyArg_ParseTuple(args, "O!:nonnormalentries",
@@ -575,6 +576,10 @@  static PyObject *nonnormalentries(PyObje
 	if (nonnset == NULL)
 		goto bail;
 
+	otherpset = PySet_New(NULL);
+	if (otherpset == NULL)
+		goto bail;
+
 	pos = 0;
 	while (PyDict_Next(dmap, &pos, &fname, &v)) {
 		dirstateTupleObject *t;
@@ -585,15 +590,26 @@  static PyObject *nonnormalentries(PyObje
 		}
 		t = (dirstateTupleObject *)v;
 
+		if (t->state == 'n' && t->size == -2) {
+			if (PySet_Add(otherpset, fname) == -1) {
+				goto bail;
+			}
+		}
+
 		if (t->state == 'n' && t->mtime != -1)
 			continue;
 		if (PySet_Add(nonnset, fname) == -1)
 			goto bail;
 	}
 
-	return nonnset;
+	result = Py_BuildValue("(OO)", nonnset, otherpset);
+	if (result == NULL)
+		goto bail;
+	return result;
 bail:
 	Py_XDECREF(nonnset);
+	Py_XDECREF(otherpset);
+	Py_XDECREF(result);
 	return NULL;
 }