Patchwork [6,of,7,V4] delta: have a native implementation of _findsnapshot

login
register
mail settings
Submitter Boris Feld
Date Dec. 30, 2018, 6:43 p.m.
Message ID <a6bb015a9fb1a2c23d73.1546195433@Laptop-Boris.lan>
Download mbox | patch
Permalink /patch/37397/
State Accepted
Headers show

Comments

Boris Feld - Dec. 30, 2018, 6:43 p.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1545297320 -3600
#      Thu Dec 20 10:15:20 2018 +0100
# Node ID a6bb015a9fb1a2c23d73f1c55e7d4160a9701191
# Parent  5aff42c50dd1bc1e7c2e533a8e052fa42c6f9304
# EXP-Topic sparse-revlog
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r a6bb015a9fb1
delta: have a native implementation of _findsnapshot

The function might traverse a lot of revision, a native implementation get
significantly faster.

example affected manifest write
before: 0.114989
after:  0.067141 (-42%)
Yuya Nishihara - Jan. 2, 2019, 3:04 a.m.
On Sun, 30 Dec 2018 19:43:53 +0100, Boris Feld wrote:
> # HG changeset patch
> # User Boris Feld <boris.feld@octobus.net>
> # Date 1545297320 -3600
> #      Thu Dec 20 10:15:20 2018 +0100
> # Node ID a6bb015a9fb1a2c23d73f1c55e7d4160a9701191
> # Parent  5aff42c50dd1bc1e7c2e533a8e052fa42c6f9304
> # EXP-Topic sparse-revlog
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r a6bb015a9fb1
> delta: have a native implementation of _findsnapshot
> 
> The function might traverse a lot of revision, a native implementation get
> significantly faster.
> 
> example affected manifest write
> before: 0.114989
> after:  0.067141 (-42%)
> 
> diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
> --- a/mercurial/cext/revlog.c
> +++ b/mercurial/cext/revlog.c
> @@ -1050,6 +1050,80 @@ static PyObject *index_issnapshot(indexO
>  	return PyBool_FromLong((long)issnap);
>  }
>  
> +static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
> +{
> +
> +	Py_ssize_t start_rev;
> +	PyObject *cache;
> +	Py_ssize_t base;
> +	Py_ssize_t rev;
> +	int issnap;

> +	PyObject *key;
> +	bool newallvalues;
> +	PyObject *allvalues;
> +	PyObject *value;

These variables have to be initialized since we do XDECREF() on failure.

> +	if (!PyArg_ParseTuple(args, "On", &cache, &start_rev)) {
> +		return NULL;
> +	}
> +	if (!PyDict_Check(cache)) {
> +		PyErr_SetString(PyExc_TypeError,
> +		                "cache argument must be a dict");
> +		return NULL;
> +	}

Nit: maybe this can be "O!" with &PyDict_Type.

> +	for (rev = start_rev; rev < length; rev++) {
> +		issnap = index_issnapshotrev(self, rev);
> +		if (issnap < 0) {
> +			goto bail;
> +		}
> +		if (issnap == 0) {
> +			continue;
> +		}
> +		base = (Py_ssize_t)index_baserev(self, rev);
> +		if (base == rev) {
> +			base = -1;
> +		}
> +		if (base == -2) {
> +			assert(PyErr_Occurred());
> +			goto bail;
> +		}
> +		key = PyInt_FromSsize_t(base);
> +		newallvalues = false;
> +		allvalues = PyDict_GetItem(cache, key);
> +		if (allvalues == NULL && PyErr_Occurred()) {
> +			goto bail;
> +		}
> +		if (allvalues == NULL) {
> +			allvalues = PyList_New(0);

PyList_New() may fail in theory.

> +			newallvalues = true;
> +			if (PyDict_SetItem(cache, key, allvalues) < 0) {
> +				goto bail;
> +			}

I prefer cleaning up the allvalues reference here for this particular case,
so that the scope of the allvalues can be narrowed and the newallvalues can
be eliminated.

  for (rev ...) {
      PyObject *allvalues; /* borrowed */
      ...
      if (allvalues == NULL) {
          int r;
          allvalues = PyList_New(0);
          if (!allvalues)
              goto bail;
          r = PyDict_SetItem(cache, key, allvalues);
          Py_DECREF(allvalues);
          if (r < 0)
              goto bail;
      }

> +		Py_XDECREF(key);
> +		if (newallvalues) {
> +			Py_XDECREF(allvalues);
> +		}
> +		Py_XDECREF(value);
> +		key = NULL;
> +		allvalues = NULL;
> +		value = NULL;

Nit: Py_CLEAR() could be used in place of XDECREF() + = NULL.

> +	}
> +	Py_INCREF(Py_None);
> +	return Py_None;

Nit: can be Py_RETURN_NONE;
Boris Feld - Jan. 2, 2019, 10:38 p.m.
On 02/01/2019 04:04, Yuya Nishihara wrote:
> On Sun, 30 Dec 2018 19:43:53 +0100, Boris Feld wrote:
>> # HG changeset patch
>> # User Boris Feld <boris.feld@octobus.net>
>> # Date 1545297320 -3600
>> #      Thu Dec 20 10:15:20 2018 +0100
>> # Node ID a6bb015a9fb1a2c23d73f1c55e7d4160a9701191
>> # Parent  5aff42c50dd1bc1e7c2e533a8e052fa42c6f9304
>> # EXP-Topic sparse-revlog
>> # Available At https://bitbucket.org/octobus/mercurial-devel/
>> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r a6bb015a9fb1
>> delta: have a native implementation of _findsnapshot
>>
>> The function might traverse a lot of revision, a native implementation get
>> significantly faster.
>>
>> example affected manifest write
>> before: 0.114989
>> after:  0.067141 (-42%)
>>
>> diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
>> --- a/mercurial/cext/revlog.c
>> +++ b/mercurial/cext/revlog.c
>> @@ -1050,6 +1050,80 @@ static PyObject *index_issnapshot(indexO
>>  	return PyBool_FromLong((long)issnap);
>>  }
>>  
>> +static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
>> +{
>> +
>> +	Py_ssize_t start_rev;
>> +	PyObject *cache;
>> +	Py_ssize_t base;
>> +	Py_ssize_t rev;
>> +	int issnap;
>> +	PyObject *key;
>> +	bool newallvalues;
>> +	PyObject *allvalues;
>> +	PyObject *value;
> These variables have to be initialized since we do XDECREF() on failure.
Good catch, thanks!
>
>> +	if (!PyArg_ParseTuple(args, "On", &cache, &start_rev)) {
>> +		return NULL;
>> +	}
>> +	if (!PyDict_Check(cache)) {
>> +		PyErr_SetString(PyExc_TypeError,
>> +		                "cache argument must be a dict");
>> +		return NULL;
>> +	}
> Nit: maybe this can be "O!" with &PyDict_Type.
I can't find if O! match subtype or only the exact type. I've delayed
this update for now.
>
>> +	for (rev = start_rev; rev < length; rev++) {
>> +		issnap = index_issnapshotrev(self, rev);
>> +		if (issnap < 0) {
>> +			goto bail;
>> +		}
>> +		if (issnap == 0) {
>> +			continue;
>> +		}
>> +		base = (Py_ssize_t)index_baserev(self, rev);
>> +		if (base == rev) {
>> +			base = -1;
>> +		}
>> +		if (base == -2) {
>> +			assert(PyErr_Occurred());
>> +			goto bail;
>> +		}
>> +		key = PyInt_FromSsize_t(base);
>> +		newallvalues = false;
>> +		allvalues = PyDict_GetItem(cache, key);
>> +		if (allvalues == NULL && PyErr_Occurred()) {
>> +			goto bail;
>> +		}
>> +		if (allvalues == NULL) {
>> +			allvalues = PyList_New(0);
> PyList_New() may fail in theory.
>
>> +			newallvalues = true;
>> +			if (PyDict_SetItem(cache, key, allvalues) < 0) {
>> +				goto bail;
>> +			}
> I prefer cleaning up the allvalues reference here for this particular case,
> so that the scope of the allvalues can be narrowed and the newallvalues can
> be eliminated.
>
>   for (rev ...) {
>       PyObject *allvalues; /* borrowed */
>       ...
>       if (allvalues == NULL) {
>           int r;
>           allvalues = PyList_New(0);
>           if (!allvalues)
>               goto bail;
>           r = PyDict_SetItem(cache, key, allvalues);
>           Py_DECREF(allvalues);
>           if (r < 0)
>               goto bail;
>       }
>
>> +		Py_XDECREF(key);
>> +		if (newallvalues) {
>> +			Py_XDECREF(allvalues);
>> +		}
>> +		Py_XDECREF(value);
>> +		key = NULL;
>> +		allvalues = NULL;
>> +		value = NULL;
> Nit: Py_CLEAR() could be used in place of XDECREF() + = NULL.
>
>> +	}
>> +	Py_INCREF(Py_None);
>> +	return Py_None;
> Nit: can be Py_RETURN_NONE;
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Yuya Nishihara - Jan. 3, 2019, 8:57 a.m.
On Wed, 2 Jan 2019 23:38:15 +0100, Boris FELD wrote:
> On 02/01/2019 04:04, Yuya Nishihara wrote:
> > On Sun, 30 Dec 2018 19:43:53 +0100, Boris Feld wrote:
> >> # HG changeset patch
> >> # User Boris Feld <boris.feld@octobus.net>
> >> # Date 1545297320 -3600
> >> #      Thu Dec 20 10:15:20 2018 +0100
> >> # Node ID a6bb015a9fb1a2c23d73f1c55e7d4160a9701191
> >> # Parent  5aff42c50dd1bc1e7c2e533a8e052fa42c6f9304
> >> # EXP-Topic sparse-revlog
> >> # Available At https://bitbucket.org/octobus/mercurial-devel/
> >> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r a6bb015a9fb1
> >> delta: have a native implementation of _findsnapshot

> >> +	if (!PyArg_ParseTuple(args, "On", &cache, &start_rev)) {
> >> +		return NULL;
> >> +	}
> >> +	if (!PyDict_Check(cache)) {
> >> +		PyErr_SetString(PyExc_TypeError,
> >> +		                "cache argument must be a dict");
> >> +		return NULL;
> >> +	}
> > Nit: maybe this can be "O!" with &PyDict_Type.
> I can't find if O! match subtype or only the exact type. I've delayed
> this update for now.

It calls PyType_IsSubtype(), so plain subtypes should be accepted.

https://github.com/python/cpython/blob/2.7/Python/getargs.c#L1258

Patch

diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -1050,6 +1050,80 @@  static PyObject *index_issnapshot(indexO
 	return PyBool_FromLong((long)issnap);
 }
 
+static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
+{
+
+	Py_ssize_t start_rev;
+	PyObject *cache;
+	Py_ssize_t base;
+	Py_ssize_t rev;
+	int issnap;
+	PyObject *key;
+	bool newallvalues;
+	PyObject *allvalues;
+	PyObject *value;
+	const Py_ssize_t length = index_length(self);
+	if (!PyArg_ParseTuple(args, "On", &cache, &start_rev)) {
+		return NULL;
+	}
+	if (!PyDict_Check(cache)) {
+		PyErr_SetString(PyExc_TypeError,
+		                "cache argument must be a dict");
+		return NULL;
+	}
+	for (rev = start_rev; rev < length; rev++) {
+		issnap = index_issnapshotrev(self, rev);
+		if (issnap < 0) {
+			goto bail;
+		}
+		if (issnap == 0) {
+			continue;
+		}
+		base = (Py_ssize_t)index_baserev(self, rev);
+		if (base == rev) {
+			base = -1;
+		}
+		if (base == -2) {
+			assert(PyErr_Occurred());
+			goto bail;
+		}
+		key = PyInt_FromSsize_t(base);
+		newallvalues = false;
+		allvalues = PyDict_GetItem(cache, key);
+		if (allvalues == NULL && PyErr_Occurred()) {
+			goto bail;
+		}
+		if (allvalues == NULL) {
+			allvalues = PyList_New(0);
+			newallvalues = true;
+			if (PyDict_SetItem(cache, key, allvalues) < 0) {
+				goto bail;
+			}
+		}
+		value = PyInt_FromSsize_t(rev);
+		if (PyList_Append(allvalues, value)) {
+			goto bail;
+		}
+		Py_XDECREF(key);
+		if (newallvalues) {
+			Py_XDECREF(allvalues);
+		}
+		Py_XDECREF(value);
+		key = NULL;
+		allvalues = NULL;
+		value = NULL;
+	}
+	Py_INCREF(Py_None);
+	return Py_None;
+bail:
+	Py_XDECREF(key);
+	if (newallvalues) {
+		Py_XDECREF(allvalues);
+	}
+	Py_XDECREF(value);
+	return NULL;
+}
+
 static PyObject *index_deltachain(indexObject *self, PyObject *args)
 {
 	int rev, generaldelta;
@@ -2664,6 +2738,8 @@  static PyMethodDef index_methods[] = {
      "get filtered head revisions"}, /* Can always do filtering */
     {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
      "True if the object is a snapshot"},
+    {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
+     "Gather snapshot data in a cache dict"},
     {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
      "determine revisions with deltas to reconstruct fulltext"},
     {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -30,6 +30,7 @@  from ..thirdparty import (
 from .. import (
     error,
     mdiff,
+    util,
 )
 
 # maximum <delta-chain-data>/<revision-text-length> ratio
@@ -688,11 +689,14 @@  def _candidategroups(revlog, textlen, p1
 
 def _findsnapshots(revlog, cache, start_rev):
     """find snapshot from start_rev to tip"""
-    deltaparent = revlog.deltaparent
-    issnapshot = revlog.issnapshot
-    for rev in revlog.revs(start_rev):
-        if issnapshot(rev):
-            cache[deltaparent(rev)].append(rev)
+    if util.safehasattr(revlog.index, 'findsnapshots'):
+        revlog.index.findsnapshots(cache, start_rev)
+    else:
+        deltaparent = revlog.deltaparent
+        issnapshot = revlog.issnapshot
+        for rev in revlog.revs(start_rev):
+            if issnapshot(rev):
+                cache[deltaparent(rev)].append(rev)
 
 def _refinedgroups(revlog, p1, p2, cachedelta):
     good = None