Patchwork [12,of,14] sparse-revlog: introduce native (C) implementation of slicechunktodensity

login
register
mail settings
Submitter Boris Feld
Date Nov. 12, 2018, 9:55 a.m.
Message ID <4a1104eade1dfb169751.1542016547@localhost.localdomain>
Download mbox | patch
Permalink /patch/36520/
State Accepted
Headers show

Comments

Boris Feld - Nov. 12, 2018, 9:55 a.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1541785632 -3600
#      Fri Nov 09 18:47:12 2018 +0100
# Node ID 4a1104eade1dfb1697517d60d2c5fd7a98b8c7f0
# Parent  0ea42453fa491793d1e145f5093b65e84cb65e97
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 4a1104eade1d
sparse-revlog: introduce native (C) implementation of slicechunktodensity

This is a C implementation of `_slicechunktodensity` in the
`mercurial/revlogutils/deltas.py` file.

The algorithm involves a lot of integer manipulation and low-level access to
index data. Having a C implementation of it raises a large performance
improvement. See later changeset in this series for details.
Yuya Nishihara - Nov. 12, 2018, 2:50 p.m.
On Mon, 12 Nov 2018 10:55:47 +0100, Boris Feld wrote:
> # HG changeset patch
> # User Boris Feld <boris.feld@octobus.net>
> # Date 1541785632 -3600
> #      Fri Nov 09 18:47:12 2018 +0100
> # Node ID 4a1104eade1dfb1697517d60d2c5fd7a98b8c7f0
> # Parent  0ea42453fa491793d1e145f5093b65e84cb65e97
> # EXP-Topic sparse-perf
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 4a1104eade1d
> sparse-revlog: introduce native (C) implementation of slicechunktodensity

Just quickly scanned this patch, no other patches in this series nor "sparse"
logic is reviewed.

> +struct Gap {
> +	long size;
> +	long idx;
> +};
> +
> +static int gap_compare(const void *left, const void *right)
> +{
> +	return ((struct Gap *)left)->size - ((struct Gap *)right)->size;
> +}
> +static int long_compare(const void *left, const void *right)
> +{
> +	return *(long *)left - *(long *)right;
> +}

Nit: (const <type> *)<left|right> as the argument is const void*.

If we're sure 'left - right' never exceeds the int range, it might be better
to explicitly cast the result to (int) to silence possible warning.

> +static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
> +{
> +	/* method arguments */
> +	PyObject *list_revs = NULL; /* revisions in the chain */
> +	double targetdensity = 0.5; /* min density to achieve */
> +	long mingapsize = 0;        /* threshold to ignore gaps */
> +
> +	/* other core variables */
> +	long i;                  /* used for various iteration */
> +	PyObject *result = NULL; /* the final return of the function */
> +
> +	/* generic information about the delta chain being slice */
> +	Py_ssize_t num_revs = 0; /* size of the full delta chain */
> +	Py_ssize_t *revs = NULL; /* native array of revision in the chain */
> +	long chainpayload = 0;   /* sum of all delta in the chain */
> +	long deltachainspan = 0; /* distance from first byte to last byte */
> +
> +	/* variable used for slicing the delta chain */
> +	long readdata = 0;  /* amount of data currently planned to be read */
> +	double density = 0; /* ration of payload data compared to read ones */
> +	struct Gap *gaps = NULL; /* array of notable gap in the chain */
> +	long num_gaps = 0; /* total number of notable gap recorded so far */
> +	Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
> +	long num_selected = 0;               /* number of gaps skipped */

Maybe i, num_rgaps, num_selected should be Py_ssize_t? In general, long can
be narrower than ssize_t.

> +	PyObject *chunk = NULL;              /* individual slice */
> +	PyObject *allchunks = PyList_New(num_selected); /* all slices */

Needs to make sure that allchunks isn't NULL.

And it's probably better to just initialize the list with literal 0, since
we have no for-loop to fill in the list items up to num_selected.

> +	/* parsing argument */
> +	if (!PyArg_ParseTuple(args, "O!dl", &PyList_Type, &list_revs,
> +	                      &targetdensity, &mingapsize)) {
> +		goto bail;
> +	}
> +
> +	/* If the delta chain contains a single element, we do not need slicing
> +	 */
> +	num_revs = PyList_GET_SIZE(list_revs);
> +	if (num_revs <= 1) {
> +		result = PyTuple_Pack(1, list_revs);
> +		goto done;
> +	}
> +
> +	/* Turn the python list into a native integer array (for efficiency) */
> +	revs = (Py_ssize_t *)malloc((num_revs) * sizeof(Py_ssize_t));

num_revs * sizeof(...) can overflow. Using calloc() is safer if the zeroing
cost isn't significant.

> +	if (revs == NULL) {
> +		PyErr_NoMemory();
> +		goto bail;
> +	}
> +	for (i = 0; i < num_revs; i++) {
> +		Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
> +		if (revnum == -1 && PyErr_Occurred()) {
> +			goto bail;
> +		}
> +		revs[i] = revnum;
> +	}

Are we sure revnum is in valid range?

> +
> +	/* Compute and check various property of the unsliced delta chain */
> +	deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
> +
> +	if (deltachainspan <= mingapsize) {
> +		result = PyTuple_Pack(1, list_revs);
> +		goto done;
> +	}
> +	chainpayload = 0;
> +	for (i = 0; i < num_revs; i++) {
> +		chainpayload += index_get_length(self, revs[i]);
> +	}
> +
> +	readdata = deltachainspan;
> +	density = 1.0;
> +
> +	if (0 < deltachainspan) {
> +		density = (double)chainpayload / (double)deltachainspan;
> +	};
> +
> +	if (density >= targetdensity) {
> +		result = PyTuple_Pack(1, list_revs);
> +		goto done;
> +	}
> +
> +	/* if chain is too sparse, look for relevant gaps */
> +	gaps = (struct Gap *)malloc((num_revs) * sizeof(struct Gap));

This, too.
> +	if (gaps == NULL) {
> +		PyErr_NoMemory();
> +		goto bail;
> +	}
> +
> +	Py_ssize_t previous_end = -1;
> +	for (i = 0; i < num_revs; i++) {
> +		Py_ssize_t revstart = index_get_start(self, revs[i]);
> +		Py_ssize_t revsize = index_get_length(self, revs[i]);
> +		if (revsize <= 0) {
> +			continue;
> +		}
> +		if (previous_end >= 0) {
> +			Py_ssize_t gapsize = revstart - previous_end;
> +			if (gapsize > mingapsize) {
> +				gaps[num_gaps].size = gapsize;
> +				gaps[num_gaps].idx = i;
> +				num_gaps += 1;
> +			}
> +		}
> +		previous_end = revstart + revsize;
> +	}
> +	if (num_gaps == 0) {
> +		result = PyTuple_Pack(1, list_revs);
> +		goto done;
> +	}
> +	qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
> +
> +	/* Slice the largest gap first, they improve the density the most */
> +	selected_indices =
> +	    (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
> +	if (selected_indices == NULL) {
> +		PyErr_NoMemory();
> +		goto bail;
> +	}
> +
> +	for (i = num_gaps - 1; i >= 0; i--) {
> +		selected_indices[num_selected] = gaps[i].idx;
> +		readdata -= gaps[i].size;
> +		num_selected += 1;
> +		if (readdata <= 0) {
> +			density = 1.0;
> +		} else {
> +			density = (double)chainpayload / (double)readdata;
> +		}
> +		if (density >= targetdensity) {
> +			break;
> +		}
> +	}
> +	qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
> +	      &long_compare);

Py_ssize_t != long on Windows, IIRC.

> +	/* create the resulting slice */
> +	long previdx = 0;
> +	selected_indices[num_selected] = num_revs;
> +	for (i = 0; i <= num_selected; i++) {
> +		long idx = selected_indices[i];
> +		chunk = _trimchunk(self, list_revs, previdx, idx);
> +		if (chunk == NULL) {
> +			goto bail;
> +		}
> +		if (PyList_GET_SIZE(chunk) > 0) {
> +			if (PyList_Append(allchunks, chunk) == -1) {
> +				goto bail;
> +			}
> +		}
> +		Py_DECREF(chunk);
> +		chunk = NULL;
> +		previdx = idx;
> +	}
> +	result = allchunks;
> +	goto done;
> +
> +bail:
> +	if (allchunks != NULL) {
> +		Py_DECREF(allchunks);
> +	}
> +	if (chunk != NULL) {
> +		Py_DECREF(chunk);
> +	}

Py_XDECREF() can be used instead of != NULL.

> +done:
> +	if (revs != NULL) {
> +		free(revs);
> +	}
> +	if (gaps != NULL) {
> +		free(gaps);
> +	}
> +	if (selected_indices != NULL) {
> +		free(selected_indices);
> +	}

Nit: free(NULL) is guaranteed to be noop. No need to check != NULL.

> +	if (result != NULL) {
> +		Py_INCREF(result);
> +	}

Looks like an excessive incref as PyTuple_Pack() returns a new reference
for example.

> +	return result;
> +}
Boris FELD - Nov. 13, 2018, 3:35 p.m.
On 12/11/2018 15:50, Yuya Nishihara wrote:
> On Mon, 12 Nov 2018 10:55:47 +0100, Boris Feld wrote:
>> # HG changeset patch
>> # User Boris Feld <boris.feld@octobus.net>
>> # Date 1541785632 -3600
>> #      Fri Nov 09 18:47:12 2018 +0100
>> # Node ID 4a1104eade1dfb1697517d60d2c5fd7a98b8c7f0
>> # Parent  0ea42453fa491793d1e145f5093b65e84cb65e97
>> # EXP-Topic sparse-perf
>> # Available At https://bitbucket.org/octobus/mercurial-devel/
>> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 4a1104eade1d
>> sparse-revlog: introduce native (C) implementation of slicechunktodensity
> Just quickly scanned this patch, no other patches in this series nor "sparse"
> logic is reviewed.
>
>> +struct Gap {
>> +	long size;
>> +	long idx;
>> +};
>> +
>> +static int gap_compare(const void *left, const void *right)
>> +{
>> +	return ((struct Gap *)left)->size - ((struct Gap *)right)->size;
>> +}
>> +static int long_compare(const void *left, const void *right)
>> +{
>> +	return *(long *)left - *(long *)right;
>> +}
> Nit: (const <type> *)<left|right> as the argument is const void*.
>
> If we're sure 'left - right' never exceeds the int range, it might be better
> to explicitly cast the result to (int) to silence possible warning.
>
>> +static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
>> +{
>> +	/* method arguments */
>> +	PyObject *list_revs = NULL; /* revisions in the chain */
>> +	double targetdensity = 0.5; /* min density to achieve */
>> +	long mingapsize = 0;        /* threshold to ignore gaps */
>> +
>> +	/* other core variables */
>> +	long i;                  /* used for various iteration */
>> +	PyObject *result = NULL; /* the final return of the function */
>> +
>> +	/* generic information about the delta chain being slice */
>> +	Py_ssize_t num_revs = 0; /* size of the full delta chain */
>> +	Py_ssize_t *revs = NULL; /* native array of revision in the chain */
>> +	long chainpayload = 0;   /* sum of all delta in the chain */
>> +	long deltachainspan = 0; /* distance from first byte to last byte */
>> +
>> +	/* variable used for slicing the delta chain */
>> +	long readdata = 0;  /* amount of data currently planned to be read */
>> +	double density = 0; /* ration of payload data compared to read ones */
>> +	struct Gap *gaps = NULL; /* array of notable gap in the chain */
>> +	long num_gaps = 0; /* total number of notable gap recorded so far */
>> +	Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
>> +	long num_selected = 0;               /* number of gaps skipped */
> Maybe i, num_rgaps, num_selected should be Py_ssize_t? In general, long can
> be narrower than ssize_t.
>
>> +	PyObject *chunk = NULL;              /* individual slice */
>> +	PyObject *allchunks = PyList_New(num_selected); /* all slices */
> Needs to make sure that allchunks isn't NULL.
>
> And it's probably better to just initialize the list with literal 0, since
> we have no for-loop to fill in the list items up to num_selected.
>
>> +	/* parsing argument */
>> +	if (!PyArg_ParseTuple(args, "O!dl", &PyList_Type, &list_revs,
>> +	                      &targetdensity, &mingapsize)) {
>> +		goto bail;
>> +	}
>> +
>> +	/* If the delta chain contains a single element, we do not need slicing
>> +	 */
>> +	num_revs = PyList_GET_SIZE(list_revs);
>> +	if (num_revs <= 1) {
>> +		result = PyTuple_Pack(1, list_revs);
>> +		goto done;
>> +	}
>> +
>> +	/* Turn the python list into a native integer array (for efficiency) */
>> +	revs = (Py_ssize_t *)malloc((num_revs) * sizeof(Py_ssize_t));
> num_revs * sizeof(...) can overflow. Using calloc() is safer if the zeroing
> cost isn't significant.
>
>> +	if (revs == NULL) {
>> +		PyErr_NoMemory();
>> +		goto bail;
>> +	}
>> +	for (i = 0; i < num_revs; i++) {
>> +		Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
>> +		if (revnum == -1 && PyErr_Occurred()) {
>> +			goto bail;
>> +		}
>> +		revs[i] = revnum;
>> +	}
> Are we sure revnum is in valid range?
What do you mean ?
>
>> +
>> +	/* Compute and check various property of the unsliced delta chain */
>> +	deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
>> +
>> +	if (deltachainspan <= mingapsize) {
>> +		result = PyTuple_Pack(1, list_revs);
>> +		goto done;
>> +	}
>> +	chainpayload = 0;
>> +	for (i = 0; i < num_revs; i++) {
>> +		chainpayload += index_get_length(self, revs[i]);
>> +	}
>> +
>> +	readdata = deltachainspan;
>> +	density = 1.0;
>> +
>> +	if (0 < deltachainspan) {
>> +		density = (double)chainpayload / (double)deltachainspan;
>> +	};
>> +
>> +	if (density >= targetdensity) {
>> +		result = PyTuple_Pack(1, list_revs);
>> +		goto done;
>> +	}
>> +
>> +	/* if chain is too sparse, look for relevant gaps */
>> +	gaps = (struct Gap *)malloc((num_revs) * sizeof(struct Gap));
> This, too.
>> +	if (gaps == NULL) {
>> +		PyErr_NoMemory();
>> +		goto bail;
>> +	}
>> +
>> +	Py_ssize_t previous_end = -1;
>> +	for (i = 0; i < num_revs; i++) {
>> +		Py_ssize_t revstart = index_get_start(self, revs[i]);
>> +		Py_ssize_t revsize = index_get_length(self, revs[i]);
>> +		if (revsize <= 0) {
>> +			continue;
>> +		}
>> +		if (previous_end >= 0) {
>> +			Py_ssize_t gapsize = revstart - previous_end;
>> +			if (gapsize > mingapsize) {
>> +				gaps[num_gaps].size = gapsize;
>> +				gaps[num_gaps].idx = i;
>> +				num_gaps += 1;
>> +			}
>> +		}
>> +		previous_end = revstart + revsize;
>> +	}
>> +	if (num_gaps == 0) {
>> +		result = PyTuple_Pack(1, list_revs);
>> +		goto done;
>> +	}
>> +	qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
>> +
>> +	/* Slice the largest gap first, they improve the density the most */
>> +	selected_indices =
>> +	    (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
>> +	if (selected_indices == NULL) {
>> +		PyErr_NoMemory();
>> +		goto bail;
>> +	}
>> +
>> +	for (i = num_gaps - 1; i >= 0; i--) {
>> +		selected_indices[num_selected] = gaps[i].idx;
>> +		readdata -= gaps[i].size;
>> +		num_selected += 1;
>> +		if (readdata <= 0) {
>> +			density = 1.0;
>> +		} else {
>> +			density = (double)chainpayload / (double)readdata;
>> +		}
>> +		if (density >= targetdensity) {
>> +			break;
>> +		}
>> +	}
>> +	qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
>> +	      &long_compare);
> Py_ssize_t != long on Windows, IIRC.
>
>> +	/* create the resulting slice */
>> +	long previdx = 0;
>> +	selected_indices[num_selected] = num_revs;
>> +	for (i = 0; i <= num_selected; i++) {
>> +		long idx = selected_indices[i];
>> +		chunk = _trimchunk(self, list_revs, previdx, idx);
>> +		if (chunk == NULL) {
>> +			goto bail;
>> +		}
>> +		if (PyList_GET_SIZE(chunk) > 0) {
>> +			if (PyList_Append(allchunks, chunk) == -1) {
>> +				goto bail;
>> +			}
>> +		}
>> +		Py_DECREF(chunk);
>> +		chunk = NULL;
>> +		previdx = idx;
>> +	}
>> +	result = allchunks;
>> +	goto done;
>> +
>> +bail:
>> +	if (allchunks != NULL) {
>> +		Py_DECREF(allchunks);
>> +	}
>> +	if (chunk != NULL) {
>> +		Py_DECREF(chunk);
>> +	}
> Py_XDECREF() can be used instead of != NULL.
>
>> +done:
>> +	if (revs != NULL) {
>> +		free(revs);
>> +	}
>> +	if (gaps != NULL) {
>> +		free(gaps);
>> +	}
>> +	if (selected_indices != NULL) {
>> +		free(selected_indices);
>> +	}
> Nit: free(NULL) is guaranteed to be noop. No need to check != NULL.
>
>> +	if (result != NULL) {
>> +		Py_INCREF(result);
>> +	}
> Looks like an excessive incref as PyTuple_Pack() returns a new reference
> for example.
>
>> +	return result;
>> +}
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Yuya Nishihara - Nov. 14, 2018, 12:39 p.m.
On Tue, 13 Nov 2018 16:35:06 +0100, Boris FELD wrote:
> >> +	for (i = 0; i < num_revs; i++) {
> >> +		Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
> >> +		if (revnum == -1 && PyErr_Occurred()) {
> >> +			goto bail;
> >> +		}
> >> +		revs[i] = revnum;
> >> +	}
> > Are we sure revnum is in valid range?
> What do you mean ?

IOW, does the later logic makes sure that revs[i] never exceeds the length
of the index (or other data structure)? If it doesn't, we'll have to check
bad revnum values here and error out.
Boris Feld - Nov. 15, 2018, 10:03 a.m.
On 14/11/2018 13:39, Yuya Nishihara wrote:
> On Tue, 13 Nov 2018 16:35:06 +0100, Boris FELD wrote:
>>>> +	for (i = 0; i < num_revs; i++) {
>>>> +		Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
>>>> +		if (revnum == -1 && PyErr_Occurred()) {
>>>> +			goto bail;
>>>> +		}
>>>> +		revs[i] = revnum;
>>>> +	}
>>> Are we sure revnum is in valid range?
>> What do you mean ?
> IOW, does the later logic makes sure that revs[i] never exceeds the length
> of the index (or other data structure)? If it doesn't, we'll have to check
> bad revnum values here and error out.
Ha yes good point. In practice they comes from a delta-chain call, but
we better make this part solid.

Patch

diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -11,6 +11,7 @@ 
 #include <assert.h>
 #include <ctype.h>
 #include <stddef.h>
+#include <stdlib.h>
 #include <string.h>
 
 #include "bitmanipulation.h"
@@ -1041,6 +1042,197 @@  static PyObject *_trimchunk(indexObject 
 	return chunk;
 }
 
+struct Gap {
+	long size;
+	long idx;
+};
+
+static int gap_compare(const void *left, const void *right)
+{
+	return ((struct Gap *)left)->size - ((struct Gap *)right)->size;
+}
+static int long_compare(const void *left, const void *right)
+{
+	return *(long *)left - *(long *)right;
+}
+
+static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
+{
+	/* method arguments */
+	PyObject *list_revs = NULL; /* revisions in the chain */
+	double targetdensity = 0.5; /* min density to achieve */
+	long mingapsize = 0;        /* threshold to ignore gaps */
+
+	/* other core variables */
+	long i;                  /* used for various iteration */
+	PyObject *result = NULL; /* the final return of the function */
+
+	/* generic information about the delta chain being slice */
+	Py_ssize_t num_revs = 0; /* size of the full delta chain */
+	Py_ssize_t *revs = NULL; /* native array of revision in the chain */
+	long chainpayload = 0;   /* sum of all delta in the chain */
+	long deltachainspan = 0; /* distance from first byte to last byte */
+
+	/* variable used for slicing the delta chain */
+	long readdata = 0;  /* amount of data currently planned to be read */
+	double density = 0; /* ration of payload data compared to read ones */
+	struct Gap *gaps = NULL; /* array of notable gap in the chain */
+	long num_gaps = 0; /* total number of notable gap recorded so far */
+	Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
+	long num_selected = 0;               /* number of gaps skipped */
+	PyObject *chunk = NULL;              /* individual slice */
+	PyObject *allchunks = PyList_New(num_selected); /* all slices */
+
+	/* parsing argument */
+	if (!PyArg_ParseTuple(args, "O!dl", &PyList_Type, &list_revs,
+	                      &targetdensity, &mingapsize)) {
+		goto bail;
+	}
+
+	/* If the delta chain contains a single element, we do not need slicing
+	 */
+	num_revs = PyList_GET_SIZE(list_revs);
+	if (num_revs <= 1) {
+		result = PyTuple_Pack(1, list_revs);
+		goto done;
+	}
+
+	/* Turn the python list into a native integer array (for efficiency) */
+	revs = (Py_ssize_t *)malloc((num_revs) * sizeof(Py_ssize_t));
+	if (revs == NULL) {
+		PyErr_NoMemory();
+		goto bail;
+	}
+	for (i = 0; i < num_revs; i++) {
+		Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
+		if (revnum == -1 && PyErr_Occurred()) {
+			goto bail;
+		}
+		revs[i] = revnum;
+	}
+
+	/* Compute and check various property of the unsliced delta chain */
+	deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
+
+	if (deltachainspan <= mingapsize) {
+		result = PyTuple_Pack(1, list_revs);
+		goto done;
+	}
+	chainpayload = 0;
+	for (i = 0; i < num_revs; i++) {
+		chainpayload += index_get_length(self, revs[i]);
+	}
+
+	readdata = deltachainspan;
+	density = 1.0;
+
+	if (0 < deltachainspan) {
+		density = (double)chainpayload / (double)deltachainspan;
+	};
+
+	if (density >= targetdensity) {
+		result = PyTuple_Pack(1, list_revs);
+		goto done;
+	}
+
+	/* if chain is too sparse, look for relevant gaps */
+	gaps = (struct Gap *)malloc((num_revs) * sizeof(struct Gap));
+	if (gaps == NULL) {
+		PyErr_NoMemory();
+		goto bail;
+	}
+
+	Py_ssize_t previous_end = -1;
+	for (i = 0; i < num_revs; i++) {
+		Py_ssize_t revstart = index_get_start(self, revs[i]);
+		Py_ssize_t revsize = index_get_length(self, revs[i]);
+		if (revsize <= 0) {
+			continue;
+		}
+		if (previous_end >= 0) {
+			Py_ssize_t gapsize = revstart - previous_end;
+			if (gapsize > mingapsize) {
+				gaps[num_gaps].size = gapsize;
+				gaps[num_gaps].idx = i;
+				num_gaps += 1;
+			}
+		}
+		previous_end = revstart + revsize;
+	}
+	if (num_gaps == 0) {
+		result = PyTuple_Pack(1, list_revs);
+		goto done;
+	}
+	qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
+
+	/* Slice the largest gap first, they improve the density the most */
+	selected_indices =
+	    (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
+	if (selected_indices == NULL) {
+		PyErr_NoMemory();
+		goto bail;
+	}
+
+	for (i = num_gaps - 1; i >= 0; i--) {
+		selected_indices[num_selected] = gaps[i].idx;
+		readdata -= gaps[i].size;
+		num_selected += 1;
+		if (readdata <= 0) {
+			density = 1.0;
+		} else {
+			density = (double)chainpayload / (double)readdata;
+		}
+		if (density >= targetdensity) {
+			break;
+		}
+	}
+	qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
+	      &long_compare);
+
+	/* create the resulting slice */
+	long previdx = 0;
+	selected_indices[num_selected] = num_revs;
+	for (i = 0; i <= num_selected; i++) {
+		long idx = selected_indices[i];
+		chunk = _trimchunk(self, list_revs, previdx, idx);
+		if (chunk == NULL) {
+			goto bail;
+		}
+		if (PyList_GET_SIZE(chunk) > 0) {
+			if (PyList_Append(allchunks, chunk) == -1) {
+				goto bail;
+			}
+		}
+		Py_DECREF(chunk);
+		chunk = NULL;
+		previdx = idx;
+	}
+	result = allchunks;
+	goto done;
+
+bail:
+	if (allchunks != NULL) {
+		Py_DECREF(allchunks);
+	}
+	if (chunk != NULL) {
+		Py_DECREF(chunk);
+	}
+done:
+	if (revs != NULL) {
+		free(revs);
+	}
+	if (gaps != NULL) {
+		free(gaps);
+	}
+	if (selected_indices != NULL) {
+		free(selected_indices);
+	}
+	if (result != NULL) {
+		Py_INCREF(result);
+	}
+	return result;
+}
+
 static inline int nt_level(const char *node, Py_ssize_t level)
 {
 	int v = node[level >> 1];
@@ -2265,6 +2457,8 @@  static PyMethodDef index_methods[] = {
      "get filtered head revisions"}, /* Can always do filtering */
     {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
      "determine revisions with deltas to reconstruct fulltext"},
+    {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
+     METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
     {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
     {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
      "match a potentially ambiguous node ID"},