Patchwork D1769: cext: use dedicated type for index entries

login
register
mail settings
Submitter phabricator
Date Dec. 27, 2017, 12:36 a.m.
Message ID <differential-rev-PHID-DREV-zn6vmzq5ocqy5xuvvdss-req@phab.mercurial-scm.org>
Download mbox | patch
Permalink /patch/26456/
State New
Headers show

Comments

phabricator - Dec. 27, 2017, 12:36 a.m.
indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Now that we have a handle on our type to represent revlog index
  entries, let's use it.
  
  This commit essentially consists of porting code from PyTuple to
  PyObject.
  
  As part of porting the code, we now retrieve elements in the entry
  type by named attribute instead of integer index.
  
  Before, PyTuple_* APIs allowed us to retrieve a borrowed reference
  to PyObject elements. We can no longer do this via the PyObject_*
  APIs for a type not implemented in C. This required extra refcount
  manipulation in various places.
  
  The C extension is now emitting the new type. And because various
  code is still accessing index entry elements via __getitem__, we
  see a performance regression in the Firefox repository:
  
  $ hg perfrevset '::tip'
  ! wall 0.672636 comb 0.670000 user 0.650000 sys 0.020000 (best of 15)
  ! wall 0.962372 comb 0.960000 user 0.940000 sys 0.020000 (best of 10)
  
  We will claw back this regression in subsequent commits by accessing
  fields by name instead of index.
  
  The version of the "parsers" C extension has been incremented to
  reflect the backwards incompatible API change.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1769

AFFECTED FILES
  mercurial/cext/parsers.c
  mercurial/cext/revlog.c
  mercurial/policy.py

CHANGE DETAILS




To: indygreg, #hg-reviewers
Cc: mercurial-devel
phabricator - Jan. 5, 2018, 5:22 a.m.
yuja requested changes to this revision.
yuja added inline comments.
This revision now requires changes to proceed.

INLINE COMMENTS

> revlog.c:141
> +		if (!value) {
> +			PyErr_SetString(PyExc_ValueError,
> +					"could not retrieve p1rev");

Perhaps AttributeError would be set by `PyObject_GetAttrString()`.

> revlog.c:321
> +		   needs to decrease its refcount. */
> +		obj = tmpobj;
>  	}

This is tricky, but looks okay. Alternatively, we could inc/decref borrowed `obj`.

> revlog.c:327
> +		result = NULL;
> +		goto cleanup;
> +	}

I slightly prefer `goto bail` instead of `result = NULL; goto cleanup` pairs.

> revlog.c:1788
> +			PyErr_SetString(PyExc_ValueError, "could not get node");
> +			return;
> +		}

No error reported to the caller.

AttributeError would be set automatically.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1769

To: indygreg, #hg-reviewers, yuja
Cc: yuja, mercurial-devel

Patch

diff --git a/mercurial/policy.py b/mercurial/policy.py
--- a/mercurial/policy.py
+++ b/mercurial/policy.py
@@ -75,7 +75,7 @@ 
     (r'cext', r'diffhelpers'): 1,
     (r'cext', r'mpatch'): 1,
     (r'cext', r'osutil'): 2,
-    (r'cext', r'parsers'): 4,
+    (r'cext', r'parsers'): 5,
 }
 
 # map import request to other package or module
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -133,10 +133,26 @@ 
 				    int *ps, int maxrev)
 {
 	if (rev >= self->length - 1) {
-		PyObject *tuple = PyList_GET_ITEM(self->added,
+		PyObject *value;
+		PyObject *entry = PyList_GET_ITEM(self->added,
 						  rev - self->length + 1);
-		ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
-		ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
+		value = PyObject_GetAttrString(entry, "p1rev");
+		if (!value) {
+			PyErr_SetString(PyExc_ValueError,
+					"could not retrieve p1rev");
+			return -1;
+		}
+		ps[0] = (int)PyInt_AS_LONG(value);
+		Py_DECREF(value);
+
+		value = PyObject_GetAttrString(entry, "p2rev");
+		if (!value) {
+			PyErr_SetString(PyExc_ValueError,
+					"could not retrieve p2rev");
+			return -1;
+		}
+		ps[1] = (int)PyInt_AS_LONG(value);
+		Py_DECREF(value);
 	} else {
 		const char *data = index_deref(self, rev);
 		ps[0] = getbe32(data + 24);
@@ -224,9 +240,9 @@ 
 	parent_2 = getbe32(data + 28);
 	c_node_id = data + 32;
 
-	entry = Py_BuildValue(index_entry_format, offset_flags, comp_len,
-			      uncomp_len, base_rev, link_rev,
-			      parent_1, parent_2, c_node_id, 20);
+	entry = PyObject_CallFunction(self->entrytype, index_entry_format,
+			    offset_flags, comp_len, uncomp_len, base_rev,
+			    link_rev, parent_1, parent_2, c_node_id, 20);
 
 	if (entry) {
 		PyObject_GC_UnTrack(entry);
@@ -253,10 +269,16 @@ 
 		return NULL;
 
 	if (pos >= self->length - 1) {
-		PyObject *tuple, *str;
-		tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
-		str = PyTuple_GetItem(tuple, 7);
-		return str ? PyBytes_AS_STRING(str) : NULL;
+		PyObject *entry, *str;
+		char *result;
+		entry = PyList_GET_ITEM(self->added, pos - self->length + 1);
+		str = PyObject_GetAttrString(entry, "node");
+		if (!str) {
+			return NULL;
+		}
+		result = PyBytes_AS_STRING(str);
+		Py_DECREF(str);
+		return result;
 	}
 
 	data = index_deref(self, pos);
@@ -277,21 +299,48 @@ 
 
 static PyObject *index_insert(indexObject *self, PyObject *args)
 {
-	PyObject *obj;
+	PyObject *obj, *nodeobj, *result;
+	PyObject *tmpobj = NULL;
 	char *node;
 	int index;
 	Py_ssize_t len, nodelen;
 
 	if (!PyArg_ParseTuple(args, "iO", &index, &obj))
 		return NULL;
 
-	if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
-		PyErr_SetString(PyExc_TypeError, "8-tuple required");
-		return NULL;
+	/* Legacy callers may pass tuples. Convert to an index entry until
+	 * API compatibility is dropped. */
+	if (PyTuple_Check(obj)) {
+		tmpobj = PyObject_Call(self->entrytype, obj, NULL);
+		if (!tmpobj) {
+			return NULL;
+		}
+
+		/* We own a reference to tmpobj. So any return after here
+		   needs to decrease its refcount. */
+		obj = tmpobj;
 	}
 
-	if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
-		return NULL;
+	if (!PyObject_TypeCheck(obj, (PyTypeObject *)self->entrytype)) {
+		PyErr_SetString(PyExc_TypeError, "IndexV1Entry type required");
+		result = NULL;
+		goto cleanup;
+	}
+
+	nodeobj = PyObject_GetAttrString(obj, "node");
+	if (!nodeobj) {
+		PyErr_SetString(PyExc_ValueError, "could not retrieve node");
+		result = NULL;
+		goto cleanup;
+	}
+	if (node_check(nodeobj, &node, &nodelen) == -1) {
+		Py_DECREF(nodeobj);
+		result = NULL;
+		goto cleanup;
+	}
+
+	Py_DECREF(nodeobj);
+	nodeobj = NULL;
 
 	len = index_length(self);
 
@@ -301,23 +350,33 @@ 
 	if (index != len - 1) {
 		PyErr_SetString(PyExc_IndexError,
 				"insert only supported at index -1");
-		return NULL;
+		result = NULL;
+		goto cleanup;
 	}
 
 	if (self->added == NULL) {
 		self->added = PyList_New(0);
-		if (self->added == NULL)
-			return NULL;
+		if (self->added == NULL) {
+			result = NULL;
+			goto cleanup;
+		}
 	}
 
-	if (PyList_Append(self->added, obj) == -1)
-		return NULL;
+	if (PyList_Append(self->added, obj) == -1) {
+		result = NULL;
+		goto cleanup;
+	}
 
 	if (self->nt)
 		nt_insert(self, node, index);
 
 	Py_CLEAR(self->headrevs);
-	Py_RETURN_NONE;
+	result = Py_None;
+	Py_INCREF(Py_None);
+
+cleanup:
+	Py_XDECREF(tmpobj);
+	return result;
 }
 
 static void _index_clearcaches(indexObject *self)
@@ -837,9 +896,19 @@ 
 	const char *data;
 
 	if (rev >= self->length - 1) {
-		PyObject *tuple = PyList_GET_ITEM(self->added,
+		PyObject *value;
+		int result;
+		PyObject *entry = PyList_GET_ITEM(self->added,
 			rev - self->length + 1);
-		return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
+		value = PyObject_GetAttrString(entry, "baserev");
+		if (!value) {
+			PyErr_SetString(PyExc_ValueError,
+					"could not obtain baserev");
+			return -2;
+		}
+		result = (int)PyInt_AS_LONG(value);
+		Py_DECREF(value);
+		return result;
 	}
 	else {
 		data = index_deref(self, rev);
@@ -1712,10 +1781,15 @@ 
 	Py_ssize_t i, len = PyList_GET_SIZE(self->added);
 
 	for (i = start; i < len; i++) {
-		PyObject *tuple = PyList_GET_ITEM(self->added, i);
-		PyObject *node = PyTuple_GET_ITEM(tuple, 7);
+		PyObject *entry = PyList_GET_ITEM(self->added, i);
+		PyObject *node = PyObject_GetAttrString(entry, "node");
+		if (!node) {
+			PyErr_SetString(PyExc_ValueError, "could not get node");
+			return;
+		}
 
 		nt_insert(self, PyBytes_AS_STRING(node), -1);
+		Py_DECREF(node);
 	}
 
 	if (start == 0)
@@ -1895,8 +1969,9 @@ 
 	}
 
 	/* Initialize before argument-checking to avoid index_dealloc() crash. */
-	self->nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
-					-1, -1, -1, -1, nullid, 20);
+	self->nullentry = PyObject_CallFunction(self->entrytype, "iiiiiiis#",
+						0, 0, 0, -1, -1, -1, -1,
+						nullid, 20);
 	if (!self->nullentry) {
 		return -1;
 	}
diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -710,7 +710,7 @@ 
 void manifest_module_init(PyObject *mod);
 void revlog_module_init(PyObject *mod);
 
-static const int version = 4;
+static const int version = 5;
 
 static void module_init(PyObject *mod)
 {