Patchwork [1,of,2] phases: add set per phase in C phase computation

login
register
mail settings
Submitter Laurent Charignon
Date May 4, 2015, 8:06 p.m.
Message ID <73bd80fd6a2f41805e81.1430769965@lcharignon-mbp.local>
Download mbox | patch
Permalink /patch/8873/
State Superseded
Commit 22438cfd11b50c9bb8778dfb43167becf573e193
Delegated to: Augie Fackler
Headers show

Comments

Laurent Charignon - May 4, 2015, 8:06 p.m.
# HG changeset patch
# User Laurent Charignon <lcharignon@fb.com>
# Date 1427912237 25200
#      Wed Apr 01 11:17:17 2015 -0700
# Node ID 73bd80fd6a2f41805e81f981a6fd48c04a496300
# Parent  e9edd53770fb77a9787a3e6592a3bf0a29c1bd80
phases: add set per phase in C phase computation

To speed up the computation of draft(), secret(), divergent(), obsolete() and
unstable() we need to have a fast way of getting the list of revisions that
are in draft(), secret() or the union of both: not public().

This patch extends the work on phase computation in C and make the phase
computation code also return a list of set for each non public phase.
Using these sets we can quickly obtain all the revisions of a given phase.
We do not return a set for the public phase as we expect it to be roughly the
size of the repo. Also, it can be computed easily by substracting the entries in the
non public phases from all the revs in the repo.
Augie Fackler - May 5, 2015, 2:08 p.m.
On Mon, May 04, 2015 at 01:06:05PM -0700, Laurent Charignon wrote:
> # HG changeset patch
> # User Laurent Charignon <lcharignon@fb.com>
> # Date 1427912237 25200
> #      Wed Apr 01 11:17:17 2015 -0700
> # Node ID 73bd80fd6a2f41805e81f981a6fd48c04a496300
> # Parent  e9edd53770fb77a9787a3e6592a3bf0a29c1bd80
> phases: add set per phase in C phase computation

parsers.c is already enormous. Can I get you to move the
phase-computing bits to a new file in the same .so? Maybe phases.c?
I'm just starting to worry about the amount of stuff kicking around in
parsers.

(See how manifest.c is included in parsers.so for an example.)

>
> To speed up the computation of draft(), secret(), divergent(), obsolete() and
> unstable() we need to have a fast way of getting the list of revisions that
> are in draft(), secret() or the union of both: not public().
>
> This patch extends the work on phase computation in C and make the phase
> computation code also return a list of set for each non public phase.
> Using these sets we can quickly obtain all the revisions of a given phase.
> We do not return a set for the public phase as we expect it to be roughly the
> size of the repo. Also, it can be computed easily by substracting the entries in the
> non public phases from all the revs in the repo.
>
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -1074,11 +1074,14 @@
>  static PyObject *compute_phases(indexObject *self, PyObject *args)
>  {
>       PyObject *roots = Py_None;
> +	PyObject *ret = NULL;
>       PyObject *phaseslist = NULL;
>       PyObject *phaseroots = NULL;
>       PyObject *rev = NULL;
>       PyObject *p1 = NULL;
>       PyObject *p2 = NULL;
> +	PyObject *phaseset = NULL;
> +	PyObject *phasessetlist = NULL;
>       Py_ssize_t addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
>       Py_ssize_t len = index_length(self) - 1;
>       Py_ssize_t numphase = 0;
> @@ -1088,6 +1091,7 @@
>       int parent_1, parent_2;
>       char *phases = NULL;
>       const char *data;
> +	long phase;
>
>       if (!PyArg_ParseTuple(args, "O", &roots))
>               goto release_none;
> @@ -1100,13 +1104,24 @@
>       /* Put the phase information of all the roots in phases */
>       numphase = PyList_GET_SIZE(roots)+1;
>       minrevallphases = len + 1;
> +	phasessetlist = PyList_New(numphase);
> +	if (phasessetlist == NULL)
> +		goto release_none;
> +
> +	PyList_SET_ITEM(phasessetlist, 0, Py_None);
> +	Py_INCREF(Py_None);
> +
>       for (i = 0; i < numphase-1; i++) {
>               phaseroots = PyList_GET_ITEM(roots, i);
> +		phaseset = PySet_New(NULL);
> +		if (phaseset == NULL)
> +			goto release_phasesetlist;
> +		PyList_SET_ITEM(phasessetlist, i+1, phaseset);
>               if (!PyList_Check(phaseroots))
> -			goto release_phases;
> +			goto release_phasesetlist;
>               minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
>               if (minrevphase == -2) /* Error from add_roots_get_min */
> -			goto release_phases;
> +			goto release_phasesetlist;
>               minrevallphases = MIN(minrevallphases, minrevphase);
>       }
>       /* Propagate the phase information from the roots to the revs */
> @@ -1121,7 +1136,7 @@
>                       p2 = PyTuple_GET_ITEM(rev, 6);
>                       if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
>                               PyErr_SetString(PyExc_TypeError, "revlog parents are invalid");
> -				goto release_phases;
> +				goto release_phasesetlist;
>                       }
>                       parent_1 = (int)PyInt_AS_LONG(p1);
>                       parent_2 = (int)PyInt_AS_LONG(p2);
> @@ -1131,14 +1146,35 @@
>       /* Transform phase list to a python list */
>       phaseslist = PyList_New(len);
>       if (phaseslist == NULL)
> -		goto release_phases;
> -	for (i = 0; i < len; i++)
> -		PyList_SET_ITEM(phaseslist, i, PyInt_FromLong(phases[i]));
> +		goto release_phasesetlist;
> +	for (i = 0; i < len; i++) {
> +		phase = phases[i];
> +		/* We only store the sets of phase for non public phase, the public phase
> +              * is computed as a difference */
> +		if (phase != 0) {
> +			phaseset = PyList_GET_ITEM(phasessetlist, phase);
> +			PySet_Add(phaseset, PyInt_FromLong(i));
> +		}
> +		PyList_SET_ITEM(phaseslist, i, PyInt_FromLong(phase));
> +	}
> +	ret = PyList_New(2);
> +	if (ret == NULL)
> +		goto release_phaseslist;
>
> +	PyList_SET_ITEM(ret, 0, phaseslist);
> +	PyList_SET_ITEM(ret, 1, phasessetlist);
> +	/* We don't release phaseslist and phasessetlist as we return them to
> +      * python */
> +	goto release_phases;
> +
> +release_phaseslist:
> +	Py_XDECREF(phaseslist);
> +release_phasesetlist:
> +	Py_XDECREF(phasessetlist);
>  release_phases:
>       free(phases);
>  release_none:
> -	return phaseslist;
> +	return ret;
>  }
>
>  static PyObject *index_headrevs(indexObject *self, PyObject *args)
> diff --git a/mercurial/phases.py b/mercurial/phases.py
> --- a/mercurial/phases.py
> +++ b/mercurial/phases.py
> @@ -99,7 +99,6 @@
>  - content is pushed as draft
>
>  """
> -
>  import os
>  import errno
>  from node import nullid, nullrev, bin, hex, short
> @@ -155,6 +154,7 @@
>              # Cheap trick to allow shallow-copy without copy module
>              self.phaseroots, self.dirty = _readroots(repo, phasedefaults)
>              self._phaserevs = None
> +            self._phasesets = None
>              self.filterunknown(repo)
>              self.opener = repo.svfs
>
> @@ -191,7 +191,6 @@
>                      revs[rev] = phase
>                  for rev in repo.changelog.descendants(roots):
>                      revs[rev] = phase
> -
>      def getphaserevs(self, repo):
>          if self._phaserevs is None:
>              try:
> @@ -199,7 +198,8 @@
>                                        'nativephaseskillswitch'):
>                      self._computephaserevspure(repo)
>                  else:
> -                    self._phaserevs = self._getphaserevsnative(repo)
> +                    res = self._getphaserevsnative(repo)
> +                    self._phaserevs, self._phasesets = res
>              except AttributeError:
>                  self._computephaserevspure(repo)
>          return self._phaserevs
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
Matt Mackall - May 12, 2015, 6:18 p.m.
On Mon, 2015-05-04 at 13:06 -0700, Laurent Charignon wrote:
> # HG changeset patch
> # User Laurent Charignon <lcharignon@fb.com>
> # Date 1427912237 25200
> #      Wed Apr 01 11:17:17 2015 -0700
> # Node ID 73bd80fd6a2f41805e81f981a6fd48c04a496300
> # Parent  e9edd53770fb77a9787a3e6592a3bf0a29c1bd80
> phases: add set per phase in C phase computation
> @@ -191,7 +191,6 @@
>                      revs[rev] = phase
>                  for rev in repo.changelog.descendants(roots):
>                      revs[rev] = phase
> -
>      def getphaserevs(self, repo):
>          if self._phaserevs is None:
>              try:

Oops

> @@ -199,7 +198,8 @@
>                                        'nativephaseskillswitch'):
>                      self._computephaserevspure(repo)
>                  else:
> -                    self._phaserevs = self._getphaserevsnative(repo)
> +                    res = self._getphaserevsnative(repo)
> +                    self._phaserevs, self._phasesets = res

You've changed the prototype of the function.. thus negating all the
effort to not require a recompile when updating across this revision.
If you must, you should change the name of the function so that older
Mercurial won't try to use it.
Laurent Charignon - May 12, 2015, 7:19 p.m.
On 5/12/15, 11:18 AM, "Matt Mackall" <mpm@selenic.com> wrote:

>On Mon, 2015-05-04 at 13:06 -0700, Laurent Charignon wrote:
>> # HG changeset patch
>> # User Laurent Charignon <lcharignon@fb.com>
>> # Date 1427912237 25200
>> #      Wed Apr 01 11:17:17 2015 -0700
>> # Node ID 73bd80fd6a2f41805e81f981a6fd48c04a496300
>> # Parent  e9edd53770fb77a9787a3e6592a3bf0a29c1bd80
>> phases: add set per phase in C phase computation
>> @@ -191,7 +191,6 @@
>>                      revs[rev] = phase
>>                  for rev in repo.changelog.descendants(roots):
>>                      revs[rev] = phase
>> -
>>      def getphaserevs(self, repo):
>>          if self._phaserevs is None:
>>              try:
>
>Oops
>
>> @@ -199,7 +198,8 @@
>>                                        'nativephaseskillswitch'):
>>                      self._computephaserevspure(repo)
>>                  else:
>> -                    self._phaserevs = self._getphaserevsnative(repo)
>> +                    res = self._getphaserevsnative(repo)
>> +                    self._phaserevs, self._phasesets = res
>
>You've changed the prototype of the function.. thus negating all the
>effort to not require a recompile when updating across this revision.
>If you must, you should change the name of the function so that older
>Mercurial won't try to use it.

Ok, I will fix that, thanks!


>
>-- 
>Mathematics is the supreme nostalgia of our time.
>

Patch

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -1074,11 +1074,14 @@ 
 static PyObject *compute_phases(indexObject *self, PyObject *args)
 {
 	PyObject *roots = Py_None;
+	PyObject *ret = NULL;
 	PyObject *phaseslist = NULL;
 	PyObject *phaseroots = NULL;
 	PyObject *rev = NULL;
 	PyObject *p1 = NULL;
 	PyObject *p2 = NULL;
+	PyObject *phaseset = NULL;
+	PyObject *phasessetlist = NULL;
 	Py_ssize_t addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
 	Py_ssize_t len = index_length(self) - 1;
 	Py_ssize_t numphase = 0;
@@ -1088,6 +1091,7 @@ 
 	int parent_1, parent_2;
 	char *phases = NULL;
 	const char *data;
+	long phase;
 
 	if (!PyArg_ParseTuple(args, "O", &roots))
 		goto release_none;
@@ -1100,13 +1104,24 @@ 
 	/* Put the phase information of all the roots in phases */
 	numphase = PyList_GET_SIZE(roots)+1;
 	minrevallphases = len + 1;
+	phasessetlist = PyList_New(numphase);
+	if (phasessetlist == NULL)
+		goto release_none;
+
+	PyList_SET_ITEM(phasessetlist, 0, Py_None);
+	Py_INCREF(Py_None);
+
 	for (i = 0; i < numphase-1; i++) {
 		phaseroots = PyList_GET_ITEM(roots, i);
+		phaseset = PySet_New(NULL);
+		if (phaseset == NULL)
+			goto release_phasesetlist;
+		PyList_SET_ITEM(phasessetlist, i+1, phaseset);
 		if (!PyList_Check(phaseroots))
-			goto release_phases;
+			goto release_phasesetlist;
 		minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
 		if (minrevphase == -2) /* Error from add_roots_get_min */
-			goto release_phases;
+			goto release_phasesetlist;
 		minrevallphases = MIN(minrevallphases, minrevphase);
 	}
 	/* Propagate the phase information from the roots to the revs */
@@ -1121,7 +1136,7 @@ 
 			p2 = PyTuple_GET_ITEM(rev, 6);
 			if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
 				PyErr_SetString(PyExc_TypeError, "revlog parents are invalid");
-				goto release_phases;
+				goto release_phasesetlist;
 			}
 			parent_1 = (int)PyInt_AS_LONG(p1);
 			parent_2 = (int)PyInt_AS_LONG(p2);
@@ -1131,14 +1146,35 @@ 
 	/* Transform phase list to a python list */
 	phaseslist = PyList_New(len);
 	if (phaseslist == NULL)
-		goto release_phases;
-	for (i = 0; i < len; i++)
-		PyList_SET_ITEM(phaseslist, i, PyInt_FromLong(phases[i]));
+		goto release_phasesetlist;
+	for (i = 0; i < len; i++) {
+		phase = phases[i];
+		/* We only store the sets of phase for non public phase, the public phase
+		 * is computed as a difference */
+		if (phase != 0) {
+			phaseset = PyList_GET_ITEM(phasessetlist, phase);
+			PySet_Add(phaseset, PyInt_FromLong(i));
+		}
+		PyList_SET_ITEM(phaseslist, i, PyInt_FromLong(phase));
+	}
+	ret = PyList_New(2);
+	if (ret == NULL)
+		goto release_phaseslist;
 
+	PyList_SET_ITEM(ret, 0, phaseslist);
+	PyList_SET_ITEM(ret, 1, phasessetlist);
+	/* We don't release phaseslist and phasessetlist as we return them to
+	 * python */
+	goto release_phases;
+
+release_phaseslist:
+	Py_XDECREF(phaseslist);
+release_phasesetlist:
+	Py_XDECREF(phasessetlist);
 release_phases:
 	free(phases);
 release_none:
-	return phaseslist;
+	return ret;
 }
 
 static PyObject *index_headrevs(indexObject *self, PyObject *args)
diff --git a/mercurial/phases.py b/mercurial/phases.py
--- a/mercurial/phases.py
+++ b/mercurial/phases.py
@@ -99,7 +99,6 @@ 
 - content is pushed as draft
 
 """
-
 import os
 import errno
 from node import nullid, nullrev, bin, hex, short
@@ -155,6 +154,7 @@ 
             # Cheap trick to allow shallow-copy without copy module
             self.phaseroots, self.dirty = _readroots(repo, phasedefaults)
             self._phaserevs = None
+            self._phasesets = None
             self.filterunknown(repo)
             self.opener = repo.svfs
 
@@ -191,7 +191,6 @@ 
                     revs[rev] = phase
                 for rev in repo.changelog.descendants(roots):
                     revs[rev] = phase
-
     def getphaserevs(self, repo):
         if self._phaserevs is None:
             try:
@@ -199,7 +198,8 @@ 
                                       'nativephaseskillswitch'):
                     self._computephaserevspure(repo)
                 else:
-                    self._phaserevs = self._getphaserevsnative(repo)
+                    res = self._getphaserevsnative(repo)
+                    self._phaserevs, self._phasesets = res
             except AttributeError:
                 self._computephaserevspure(repo)
         return self._phaserevs