From patchwork Fri Dec 28 18:12:53 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [7,of,8,V3] delta: have a native implementation of _findsnapshot From: Boris Feld X-Patchwork-Id: 37384 Message-Id: To: mercurial-devel@mercurial-scm.org Date: Fri, 28 Dec 2018 19:12:53 +0100 # HG changeset patch # User Boris Feld # Date 1545297320 -3600 # Thu Dec 20 10:15:20 2018 +0100 # Node ID d5912247e9a798f3a329794f64ce0627af0ba107 # Parent f4307f117d823efa9497a8ffa46bf13f68a16c2d # EXP-Topic sparse-revlog # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r d5912247e9a7 delta: have a native implementation of _findsnapshot The function might traverse a lot of revision, a native implementation get significantly faster. example affected manifest write before: 0.114989 after: 0.067141 (-42%) diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c +++ b/mercurial/cext/revlog.c @@ -1049,6 +1049,80 @@ static PyObject *index_issnapshot(indexO return PyBool_FromLong((long)issnap); } +static PyObject *index_findsnapshots(indexObject *self, PyObject *args) +{ + + Py_ssize_t start_rev; + PyObject *cache; + Py_ssize_t base; + Py_ssize_t rev; + int issnap; + PyObject *key; + bool newallvalues; + PyObject *allvalues; + PyObject *value; + const Py_ssize_t length = index_length(self); + if (!PyArg_ParseTuple(args, "On", &cache, &start_rev)) { + return NULL; + } + if (!PyDict_Check(cache)) { + PyErr_SetString(PyExc_TypeError, + "cache argument must be a dict"); + return NULL; + } + for (rev = start_rev; rev < length; rev++) { + issnap = index_issnapshotrev(self, rev); + if (issnap < 0) { + goto bail; + } + if (issnap == 0) { + continue; + } + base = (Py_ssize_t)index_baserev(self, rev); + if (base == rev) { + base = -1; + } + if (base == -2) { + assert(PyErr_Occurred()); + goto bail; + } + key = PyInt_FromSsize_t(base); + newallvalues = false; + allvalues = PyDict_GetItem(cache, key); + if (allvalues == NULL && PyErr_Occurred()) { + goto bail; + } + if (allvalues == NULL) { + allvalues = PyList_New(0); + newallvalues = true; + if (PyDict_SetItem(cache, key, allvalues) < 0) { + goto bail; + } + } + value = PyInt_FromSsize_t(rev); + if (PyList_Append(allvalues, value)) { + goto bail; + } + Py_XDECREF(key); + if (newallvalues) { + Py_XDECREF(allvalues); + } + Py_XDECREF(value); + key = NULL; + allvalues = NULL; + value = NULL; + } + Py_INCREF(Py_None); + return Py_None; +bail: + Py_XDECREF(key); + if (newallvalues) { + Py_XDECREF(allvalues); + } + Py_XDECREF(value); + return NULL; +} + static PyObject *index_deltachain(indexObject *self, PyObject *args) { int rev, generaldelta; @@ -2663,6 +2737,8 @@ static PyMethodDef index_methods[] = { "get filtered head revisions"}, /* Can always do filtering */ {"issnapshot", (PyCFunction)index_issnapshot, METH_O, "True if the object is a snapshot"}, + {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS, + "Gather snapshot data in a cache dict"}, {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"}, {"slicechunktodensity", (PyCFunction)index_slicechunktodensity, diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py +++ b/mercurial/revlogutils/deltas.py @@ -30,6 +30,7 @@ from ..thirdparty import ( from .. import ( error, mdiff, + util, ) # maximum / ratio @@ -688,11 +689,14 @@ def _candidategroups(revlog, textlen, p1 def _findsnapshots(revlog, cache, start_rev): """find snapshot from start_rev to tip""" - deltaparent = revlog.deltaparent - issnapshot = revlog.issnapshot - for rev in revlog.revs(start_rev): - if issnapshot(rev): - cache[deltaparent(rev)].append(rev) + if util.safehasattr(revlog.index, 'findsnapshots'): + revlog.index.findsnapshots(cache, start_rev) + else: + deltaparent = revlog.deltaparent + issnapshot = revlog.issnapshot + for rev in revlog.revs(start_rev): + if issnapshot(rev): + cache[deltaparent(rev)].append(rev) def _refinedgroups(revlog, p1, p2, cachedelta): good = None