Patchwork D8698: phases: sparsify phaseroots and phasesets

login
register
mail settings
Submitter phabricator
Date July 7, 2020, 10:39 p.m.
Message ID <differential-rev-PHID-DREV-leysjmmxhj5a7nfg3vqj-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/46650/
State Superseded
Headers show

Comments

phabricator - July 7, 2020, 10:39 p.m.
joerg.sonnenberger created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  As final step of dealing with the holes in the phase numbers, make
  phaseroots and phasesets both dictionaries indexed by the phase number.
  Further adjust the interface of the C module by pushing the node to
  revision mapping down as it is cheaper on the C side to deal with
  revision numbers.
  
  Overall, the patch series improves a no-change "hg up" for my NetBSD test
  repository from 4.7s to 1.3s.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D8698

AFFECTED FILES
  mercurial/cext/parsers.c
  mercurial/cext/revlog.c
  mercurial/phases.py
  mercurial/policy.py
  tests/test-parseindex.t

CHANGE DETAILS




To: joerg.sonnenberger, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/test-parseindex.t b/tests/test-parseindex.t
--- a/tests/test-parseindex.t
+++ b/tests/test-parseindex.t
@@ -185,7 +185,7 @@ 
   > ops = [
   >     ('reachableroots',
   >      lambda: cl.index.reachableroots2(0, [1], [0], False)),
-  >     ('compute_phases_map_sets', lambda: cl.computephases([[0], []])),
+  >     ('compute_phases_map_sets', lambda: cl.computephases({1: {cl.node(0)}})),
   >     ('index_headrevs', lambda: cl.headrevs()),
   >     ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)),
   >     ('find_deepest', lambda: cl.ancestor(n0, n1)),
diff --git a/mercurial/policy.py b/mercurial/policy.py
--- a/mercurial/policy.py
+++ b/mercurial/policy.py
@@ -80,7 +80,7 @@ 
     ('cext', 'bdiff'): 3,
     ('cext', 'mpatch'): 1,
     ('cext', 'osutil'): 4,
-    ('cext', 'parsers'): 16,
+    ('cext', 'parsers'): 17,
 }
 
 # map import request to other package or module
diff --git a/mercurial/phases.py b/mercurial/phases.py
--- a/mercurial/phases.py
+++ b/mercurial/phases.py
@@ -170,7 +170,7 @@ 
     """
     repo = repo.unfiltered()
     dirty = False
-    roots = [set() for i in range(max(allphases) + 1)]
+    roots = {i: set() for i in allphases}
     try:
         f, pending = txnutil.trypending(repo.root, repo.svfs, b'phaseroots')
         try:
@@ -333,7 +333,11 @@ 
         if len(cl) >= self._loadedrevslen:
             self.invalidate()
             self.loadphaserevs(repo)
-        return any(self.phaseroots[1:])
+        return any(
+            revs
+            for phase, revs in pycompat.iteritems(self.phaseroots)
+            if phase != public
+        )
 
     def nonpublicphaseroots(self, repo):
         """returns the roots of all non-public phases
@@ -346,7 +350,13 @@ 
         if len(cl) >= self._loadedrevslen:
             self.invalidate()
             self.loadphaserevs(repo)
-        return set().union(*[roots for roots in self.phaseroots[1:] if roots])
+        return set().union(
+            *[
+                revs
+                for phase, revs in pycompat.iteritems(self.phaseroots)
+                if phase != public
+            ]
+        )
 
     def getrevset(self, repo, phases, subset=None):
         """return a smartset for the given phases"""
@@ -405,7 +415,7 @@ 
         # Shallow copy meant to ensure isolation in
         # advance/retractboundary(), nothing more.
         ph = self.__class__(None, None, _load=False)
-        ph.phaseroots = self.phaseroots[:]
+        ph.phaseroots = self.phaseroots.copy()
         ph.dirty = self.dirty
         ph.opener = self.opener
         ph._loadedrevslen = self._loadedrevslen
@@ -425,21 +435,12 @@ 
 
     def _getphaserevsnative(self, repo):
         repo = repo.unfiltered()
-        nativeroots = []
-        for phase in trackedphases:
-            nativeroots.append(
-                pycompat.maplist(repo.changelog.rev, self.phaseroots[phase])
-            )
-        revslen, phasesets = repo.changelog.computephases(nativeroots)
-        phasesets2 = [set() for phase in range(max(allphases) + 1)]
-        for phase, phaseset in zip(allphases, phasesets):
-            phasesets2[phase] = phaseset
-        return revslen, phasesets2
+        return repo.changelog.computephases(self.phaseroots)
 
     def _computephaserevspure(self, repo):
         repo = repo.unfiltered()
         cl = repo.changelog
-        self._phasesets = [set() for phase in range(max(allphases) + 1)]
+        self._phasesets = {phase: set() for phase in allphases}
         lowerroots = set()
         for phase in reversed(trackedphases):
             roots = pycompat.maplist(cl.rev, self.phaseroots[phase])
@@ -493,7 +494,7 @@ 
             f.close()
 
     def _write(self, fp):
-        for phase, roots in enumerate(self.phaseroots):
+        for phase, roots in pycompat.iteritems(self.phaseroots):
             for h in sorted(roots):
                 fp.write(b'%i %s\n' % (phase, hex(h)))
         self.dirty = False
@@ -575,7 +576,11 @@ 
         return changes
 
     def retractboundary(self, repo, tr, targetphase, nodes):
-        oldroots = self.phaseroots[: targetphase + 1]
+        oldroots = {
+            phase: revs
+            for phase, revs in pycompat.iteritems(self.phaseroots)
+            if phase <= targetphase
+        }
         if tr is None:
             phasetracking = None
         else:
@@ -594,7 +599,7 @@ 
             # find the phase of the affected revision
             for phase in pycompat.xrange(targetphase, -1, -1):
                 if phase:
-                    roots = oldroots[phase]
+                    roots = oldroots.get(phase, [])
                     revs = set(repo.revs(b'%ln::%ld', roots, affected))
                     affected -= revs
                 else:  # public phase
@@ -648,7 +653,7 @@ 
         """
         filtered = False
         has_node = repo.changelog.index.has_node  # to filter unknown nodes
-        for phase, nodes in enumerate(self.phaseroots):
+        for phase, nodes in pycompat.iteritems(self.phaseroots):
             missing = sorted(node for node in nodes if not has_node(node))
             if missing:
                 for mnode in missing:
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -109,6 +109,9 @@ 
 
 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
 
+static int index_find_node(indexObject *self, const char *node,
+                           Py_ssize_t nodelen);
+
 #if LONG_MAX == 0x7fffffffL
 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
 #else
@@ -577,34 +580,6 @@ 
 	}
 }
 
-static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
-                                    Py_ssize_t marker, char *phases)
-{
-	PyObject *iter = NULL;
-	PyObject *iter_item = NULL;
-	Py_ssize_t min_idx = index_length(self) + 2;
-	long iter_item_long;
-
-	if (PyList_GET_SIZE(list) != 0) {
-		iter = PyObject_GetIter(list);
-		if (iter == NULL)
-			return -2;
-		while ((iter_item = PyIter_Next(iter))) {
-			if (!pylong_to_long(iter_item, &iter_item_long)) {
-				Py_DECREF(iter_item);
-				return -2;
-			}
-			Py_DECREF(iter_item);
-			if (iter_item_long < min_idx)
-				min_idx = iter_item_long;
-			phases[iter_item_long] = (char)marker;
-		}
-		Py_DECREF(iter);
-	}
-
-	return min_idx;
-}
-
 static inline void set_phase_from_parents(char *phases, int parent_1,
                                           int parent_2, Py_ssize_t i)
 {
@@ -773,99 +748,165 @@ 
 	return NULL;
 }
 
+static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
+                             char phase)
+{
+	Py_ssize_t len = index_length(self);
+	PyObject *iter;
+	PyObject *item;
+	PyObject *iterator;
+	int rev, minrev = -1;
+	char *node;
+
+	if (!PySet_Check(roots))
+		return -2;
+	iterator = PyObject_GetIter(roots);
+	if (iterator == NULL)
+		return -2;
+	while ((item = PyIter_Next(iterator))) {
+		if (node_check(item, &node) == -1)
+			goto failed;
+		rev = index_find_node(self, node, 20);
+		/* null is implicitly public, so negative is invalid */
+		if (rev < 0 || rev >= len)
+			goto failed;
+		phases[rev] = phase;
+		if (minrev == -1 || minrev > rev)
+			minrev = rev;
+		Py_DECREF(item);
+	}
+	Py_DECREF(iterator);
+	return minrev;
+failed:
+	Py_DECREF(iterator);
+	Py_DECREF(item);
+	return -2;
+}
+
 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
 {
-	PyObject *roots = Py_None;
+	/* 0: public (untracked), 1: draft, 2: secret, 32: archive,
+	   96: internal */
+	static const char trackedphases[] = {1, 2, 32, 96};
 	PyObject *ret = NULL;
-	PyObject *phasessize = NULL;
+	PyObject *roots = Py_None;
+	PyObject *idx = NULL;
+	PyObject *pyphase = NULL;
+	PyObject *pyrev = NULL;
 	PyObject *phaseroots = NULL;
-	PyObject *phaseset = NULL;
-	PyObject *phasessetlist = NULL;
-	PyObject *rev = NULL;
+	PyObject *phasessize = NULL;
+	PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
 	Py_ssize_t len = index_length(self);
-	Py_ssize_t numphase = 0;
-	Py_ssize_t minrevallphases = 0;
-	Py_ssize_t minrevphase = 0;
-	Py_ssize_t i = 0;
+	const char *currentphase;
 	char *phases = NULL;
-	long phase;
+	int minphaserev = -1, rev, i;
+	const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
 
 	if (!PyArg_ParseTuple(args, "O", &roots))
-		goto done;
-	if (roots == NULL || !PyList_Check(roots)) {
-		PyErr_SetString(PyExc_TypeError, "roots must be a list");
-		goto done;
+		return NULL;
+	if (roots == NULL || !PyDict_Check(roots)) {
+		PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
+		return NULL;
 	}
 
-	phases = calloc(
-	    len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
+	phases = calloc(len, 1);
 	if (phases == NULL) {
 		PyErr_NoMemory();
-		goto done;
+		return NULL;
 	}
-	/* Put the phase information of all the roots in phases */
-	numphase = PyList_GET_SIZE(roots) + 1;
-	minrevallphases = len + 1;
-	phasessetlist = PyList_New(numphase);
-	if (phasessetlist == NULL)
-		goto done;
+
+	for (i = 0; i < numphases; ++i) {
+		pyphase = PyInt_FromLong(trackedphases[i]);
+		if (pyphase == NULL)
+			goto release;
+		phaseroots = PyDict_GetItem(roots, pyphase);
+		Py_DECREF(pyphase);
+		if (phaseroots == NULL)
+			continue;
+		rev = add_roots_get_min(self, phaseroots, phases, trackedphases[i]);
+		phaseroots = NULL;
+		if (rev == -2)
+			goto release;
+		if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
+			minphaserev = rev;
+	}
+
+	for (i = 0; i < numphases; ++i) {
+		phasesets[i] = PySet_New(NULL);
+		if (phasesets[i] == NULL)
+			goto release;
+	}
 
-	PyList_SET_ITEM(phasessetlist, 0, Py_None);
-	Py_INCREF(Py_None);
-
-	for (i = 0; i < numphase - 1; i++) {
-		phaseroots = PyList_GET_ITEM(roots, i);
-		phaseset = PySet_New(NULL);
-		if (phaseset == NULL)
+	if (minphaserev == -1)
+		minphaserev = len;
+	for (rev = minphaserev; rev < len; ++rev) {
+		int parents[2];
+		/*
+		 * The parent lookup could be skipped for phaseroots, but
+		 * phase --force would historically not recompute them
+		 * correctly, leaving descendents with a lower phase around.
+		 * As such, unconditionally recompute the phase.
+		 */
+		if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
 			goto release;
-		PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
-		if (!PyList_Check(phaseroots)) {
-			PyErr_SetString(PyExc_TypeError,
-			                "roots item must be a list");
+		set_phase_from_parents(phases, parents[0], parents[1], rev);
+		switch (phases[rev]) {
+		case 0:
+			continue;
+		case 1:
+			pyphase = phasesets[0];
+			break;
+		case 2:
+			pyphase = phasesets[1];
+			break;
+		case 32:
+			pyphase = phasesets[2];
+			break;
+		case 96:
+			pyphase = phasesets[3];
+			break;
+		default:
 			goto release;
 		}
-		minrevphase =
-		    add_roots_get_min(self, phaseroots, i + 1, phases);
-		if (minrevphase == -2) /* Error from add_roots_get_min */
+		pyrev = PyInt_FromLong(rev);
+		if (pyrev == NULL)
 			goto release;
-		minrevallphases = MIN(minrevallphases, minrevphase);
+		if (PySet_Add(pyphase, pyrev) == -1) {
+			Py_DECREF(pyrev);
+			goto release;
+		}
+		Py_DECREF(pyrev);
 	}
-	/* Propagate the phase information from the roots to the revs */
-	if (minrevallphases != -1) {
-		int parents[2];
-		for (i = minrevallphases; i < len; i++) {
-			if (index_get_parents(self, i, parents, (int)len - 1) <
-			    0)
-				goto release;
-			set_phase_from_parents(phases, parents[0], parents[1],
-			                       i);
+	phaseroots = PyDict_New();
+	if (phaseroots == NULL)
+		goto release;
+	for (int i = 0; i < numphases; ++i) {
+		pyphase = PyInt_FromLong(trackedphases[i]);
+		if (pyphase == NULL)
+			goto release;
+		if (PyDict_SetItem(phaseroots, pyphase, phasesets[i]) == -1) {
+			Py_DECREF(pyphase);
+			goto release;
 		}
+		Py_DECREF(phasesets[i]);
+		phasesets[i] = NULL;
 	}
-	/* Transform phase list to a python list */
 	phasessize = PyInt_FromSsize_t(len);
 	if (phasessize == NULL)
 		goto release;
-	for (i = 0; i < len; i++) {
-		phase = phases[i];
-		/* We only store the sets of phase for non public phase, the
-		 * public phase is computed as a difference */
-		if (phase != 0) {
-			phaseset = PyList_GET_ITEM(phasessetlist, phase);
-			rev = PyInt_FromSsize_t(i);
-			if (rev == NULL)
-				goto release;
-			PySet_Add(phaseset, rev);
-			Py_XDECREF(rev);
-		}
-	}
-	ret = PyTuple_Pack(2, phasessize, phasessetlist);
+
+	ret = PyTuple_Pack(2, phasessize, phaseroots);
+	Py_DECREF(phasessize);
+	Py_DECREF(phaseroots);
+	return ret;
 
 release:
-	Py_XDECREF(phasessize);
-	Py_XDECREF(phasessetlist);
-done:
+	for (i = 0; i < numphases; ++i)
+		Py_XDECREF(phasesets[i]);
+	Py_XDECREF(phaseroots);
+
 	free(phases);
-	return ret;
+	return NULL;
 }
 
 static PyObject *index_headrevs(indexObject *self, PyObject *args)
diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -667,7 +667,7 @@ 
 void manifest_module_init(PyObject *mod);
 void revlog_module_init(PyObject *mod);
 
-static const int version = 16;
+static const int version = 17;
 
 static void module_init(PyObject *mod)
 {