Patchwork [2,of,7,V4] sparse-revlog: add a `index_get_length` function in C

login
register
mail settings
Submitter Boris Feld
Date Nov. 20, 2018, 8:44 p.m.
Message ID <8863dc1f7d078827708d.1542746673@localhost.localdomain>
Download mbox | patch
Permalink /patch/36679/
State Superseded
Headers show

Comments

Boris Feld - Nov. 20, 2018, 8:44 p.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1541785378 -3600
#      Fri Nov 09 18:42:58 2018 +0100
# Node ID 8863dc1f7d078827708d178bbbfee5519b4c9b0f
# Parent  60a55da39befa4996c1a88ca6f663765ea143ef2
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 8863dc1f7d07
sparse-revlog: add a `index_get_length` function in C

We are about to implement a native version of `slicechunktodensity`. For
clarity, we introduce the helper functions first. This new function provides
an efficient way to retrieve some of the information needed by
`slicechunktodensity`.
Yuya Nishihara - Nov. 21, 2018, 12:32 p.m.
On Tue, 20 Nov 2018 21:44:33 +0100, Boris Feld wrote:
> # HG changeset patch
> # User Boris Feld <boris.feld@octobus.net>
> # Date 1541785378 -3600
> #      Fri Nov 09 18:42:58 2018 +0100
> # Node ID 8863dc1f7d078827708d178bbbfee5519b4c9b0f
> # Parent  60a55da39befa4996c1a88ca6f663765ea143ef2
> # EXP-Topic sparse-perf
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 8863dc1f7d07
> sparse-revlog: add a `index_get_length` function in C
> 
> We are about to implement a native version of `slicechunktodensity`. For
> clarity, we introduce the helper functions first. This new function provides
> an efficient way to retrieve some of the information needed by
> `slicechunktodensity`.
> 
> diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
> --- a/mercurial/cext/revlog.c
> +++ b/mercurial/cext/revlog.c
> @@ -225,6 +225,44 @@ static inline int64_t index_get_start(in
>  	return (int64_t)(offset >> 16);
>  }
>  
> +static inline int index_get_length(indexObject *self, Py_ssize_t rev)
> +{
> +	if (rev >= self->length) {
> +		PyObject *tuple;
> +		PyObject *pylong;
> +		unsigned PY_LONG_LONG tmp;
> +		tuple = PyList_GET_ITEM(self->added, rev - self->length);
> +		pylong = PyTuple_GET_ITEM(tuple, 1);
> +#ifndef IS_PY3K
> +		if (PyInt_Check(pylong)) {
> +			long tmp2 = PyInt_AsLong(pylong);
> +			if (tmp2 < 0) {
> +				return -1;

Nit: exception wouldn't be set if the value was actually -1.

> +			}
> +			tmp = (unsigned PY_LONG_LONG)tmp2;
> +		} else {
> +#endif
> +			tmp = PyLong_AsUnsignedLongLong(pylong);
> +			if (tmp == (unsigned PY_LONG_LONG) - 1) {
> +				return -1;
> +			}

IIUC, we can simply use PyInt_AsLong() here since a valid length should be
in int range. PyInt_AsLong is aliased to PyLong_AsLong on Python 3.

To be clear, what I have in mind is something like this:

  long ret = PyInt_AsLong(PyTuple_GET_ITEM(tuple, 1));
  if (ret == -1 && PyErr_Occurred())
      return -1;
  if (ret < 0 || ret > (long)INT_MAX)
      set overflow error and return -1;

> +		if (tmp > INT_MAX) {
> +			PyErr_Format(PyExc_OverflowError,
> +			             "revlog entry size too large (%llu)",
> +			             (long long)tmp);
> +			return -1;
> +		}
> +		return (int)tmp;

Patch

diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -225,6 +225,44 @@  static inline int64_t index_get_start(in
 	return (int64_t)(offset >> 16);
 }
 
+static inline int index_get_length(indexObject *self, Py_ssize_t rev)
+{
+	if (rev >= self->length) {
+		PyObject *tuple;
+		PyObject *pylong;
+		unsigned PY_LONG_LONG tmp;
+		tuple = PyList_GET_ITEM(self->added, rev - self->length);
+		pylong = PyTuple_GET_ITEM(tuple, 1);
+#ifndef IS_PY3K
+		if (PyInt_Check(pylong)) {
+			long tmp2 = PyInt_AsLong(pylong);
+			if (tmp2 < 0) {
+				return -1;
+			}
+			tmp = (unsigned PY_LONG_LONG)tmp2;
+		} else {
+#endif
+			tmp = PyLong_AsUnsignedLongLong(pylong);
+			if (tmp == (unsigned PY_LONG_LONG) - 1) {
+				return -1;
+			}
+
+#ifndef IS_PY3K
+		}
+#endif
+		if (tmp > INT_MAX) {
+			PyErr_Format(PyExc_OverflowError,
+			             "revlog entry size too large (%llu)",
+			             (long long)tmp);
+			return -1;
+		}
+		return (int)tmp;
+	} else {
+		const char *data = index_deref(self, rev);
+		return (int)getbe32(data + 8);
+	}
+}
+
 /*
  * RevlogNG format (all in big endian, data may be inlined):
  *    6 bytes: offset