Patchwork D4118: index: make node tree a Python object

login
register
mail settings
Submitter phabricator
Date Aug. 7, 2018, 6:27 a.m.
Message ID <40b7a57ae16f3df09646c634ab1c8bd9@localhost.localdomain>
Download mbox | patch
Permalink /patch/33387/
State Not Applicable
Headers show

Comments

phabricator - Aug. 7, 2018, 6:27 a.m.
martinvonz updated this revision to Diff 10042.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4118?vs=9983&id=10042

REVISION DETAIL
  https://phab.mercurial-scm.org/D4118

AFFECTED FILES
  mercurial/cext/revlog.c

CHANGE DETAILS




To: martinvonz, #hg-reviewers
Cc: mercurial-devel
Yuya Nishihara - Aug. 7, 2018, 2:02 p.m.
Can you bump the cext version as this patch introduces new API?

> +static int nt_init_py(nodetree *self, PyObject *args)
> +{
> +	PyObject *index;
> +	unsigned capacity;
> +	if (!PyArg_ParseTuple(args, "OI", &index, &capacity))
> +		return -1;

It leaves self->nodes uninitialized on error, and nt_dealloc() would fail if
self->nodes wasn't luckily 0. Strictly speaking, it's too late to initialize
pointers in tp_init because __init__() may be called more than once, but our
C types don't handle such cases.

> +	return nt_init(self, (indexObject*)index, capacity);

We'll probably need INCREF/DECREF business for the index object.

I didn't review the other refcounting thingy carefully. Since it's painful
to do refcounting right, an internal nodetree could be embedded in the
indexObject, and a thin PyObject wrapper could be added. Just an idea.

```
struct nodetree {
};

struct nodetreeObject {
    PyObject_HEAD
    nodetree nt;
};

struct indexObject {
    ...
    nodetree nt;
    ...
};
```
phabricator - Aug. 7, 2018, 2:03 p.m.
yuja added a comment.


  Can you bump the cext version as this patch introduces new API?
  
  > +static int nt_init_py(nodetree *self, PyObject *args)
  >  +{
  >  +	PyObject *index;
  >  +	unsigned capacity;
  >  +	if (!PyArg_ParseTuple(args, "OI", &index, &capacity))
  >  +		return -1;
  
  It leaves self->nodes uninitialized on error, and nt_dealloc() would fail if
  self->nodes wasn't luckily 0. Strictly speaking, it's too late to initialize
  pointers in tp_init because __init__() may be called more than once, but our
  C types don't handle such cases.
  
  > +	return nt_init(self, (indexObject*)index, capacity);
  
  We'll probably need INCREF/DECREF business for the index object.
  
  I didn't review the other refcounting thingy carefully. Since it's painful
  to do refcounting right, an internal nodetree could be embedded in the
  indexObject, and a thin PyObject wrapper could be added. Just an idea.
  
    struct nodetree {
    };
    
    struct nodetreeObject {
        PyObject_HEAD
        nodetree nt;
    };
    
    struct indexObject {
        ...
        nodetree nt;
        ...
    };

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4118

To: martinvonz, #hg-reviewers
Cc: yuja, mercurial-devel
Yuya Nishihara - Aug. 7, 2018, 11:04 p.m.
> +	PyObject *index;
> +	unsigned capacity;
> +	if (!PyArg_ParseTuple(args, "OI", &index, &capacity))
> +		return -1;
> +	return nt_init(self, (indexObject*)index, capacity);

One more thing, we'll probably need to enforce the index type, by `"O!"`.

And a lookup of `0xxx...` hash might not work well due to the nullid entry.
I recalled it while I was making a breakfast. I might be wrong.
phabricator - Aug. 7, 2018, 11:05 p.m.
yuja added a comment.


  > +	PyObject *index;
  >  +	unsigned capacity;
  >  +	if (!PyArg_ParseTuple(args, "OI", &index, &capacity))
  >  +		return -1;
  >  +	return nt_init(self, (indexObject*)index, capacity);
  
  One more thing, we'll probably need to enforce the index type, by `"O!"`.
  
  And a lookup of `0xxx...` hash might not work well due to the nullid entry.
  I recalled it while I was making a breakfast. I might be wrong.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4118

To: martinvonz, #hg-reviewers
Cc: yuja, mercurial-devel

Patch

diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -42,6 +42,7 @@ 
  * Zero is empty
  */
 typedef struct {
+	PyObject_HEAD
 	indexObject *index;
 	nodetreenode *nodes;
 	unsigned length;     /* # nodes in use */
@@ -1084,6 +1085,15 @@ 
 	return 0;
 }
 
+static int nt_init_py(nodetree *self, PyObject *args)
+{
+	PyObject *index;
+	unsigned capacity;
+	if (!PyArg_ParseTuple(args, "OI", &index, &capacity))
+		return -1;
+	return nt_init(self, (indexObject*)index, capacity);
+}
+
 static int nt_partialmatch(nodetree *self, const char *node,
 			   Py_ssize_t nodelen)
 {
@@ -1136,21 +1146,67 @@ 
 	return -3;
 }
 
+static void nt_dealloc(nodetree *self)
+{
+	free(self->nodes);
+	self->nodes = NULL;
+	PyObject_Del(self);
+}
+
+static PyTypeObject nodetreeType = {
+	PyVarObject_HEAD_INIT(NULL, 0) /* header */
+	"parsers.nodetree",        /* tp_name */
+	sizeof(nodetree) ,         /* tp_basicsize */
+	0,                         /* tp_itemsize */
+	(destructor)nt_dealloc,    /* tp_dealloc */
+	0,                         /* tp_print */
+	0,                         /* tp_getattr */
+	0,                         /* tp_setattr */
+	0,                         /* tp_compare */
+	0,                         /* tp_repr */
+	0,                         /* tp_as_number */
+	0,                         /* tp_as_sequence */
+	0,                         /* tp_as_mapping */
+	0,                         /* tp_hash */
+	0,                         /* tp_call */
+	0,                         /* tp_str */
+	0,                         /* tp_getattro */
+	0,                         /* tp_setattro */
+	0,                         /* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT,        /* tp_flags */
+	"nodetree",                /* tp_doc */
+	0,                         /* tp_traverse */
+	0,                         /* tp_clear */
+	0,                         /* tp_richcompare */
+	0,                         /* tp_weaklistoffset */
+	0,                         /* tp_iter */
+	0,                         /* tp_iternext */
+	0,                         /* tp_methods */
+	0,                         /* tp_members */
+	0,                         /* tp_getset */
+	0,                         /* tp_base */
+	0,                         /* tp_dict */
+	0,                         /* tp_descr_get */
+	0,                         /* tp_descr_set */
+	0,                         /* tp_dictoffset */
+	(initproc)nt_init_py,      /* tp_init */
+	0,                         /* tp_alloc */
+};
+
 static int index_init_nt(indexObject *self)
 {
 	if (self->nt == NULL) {
 		if ((size_t)self->raw_length > INT_MAX / sizeof(nodetreenode)) {
 			PyErr_SetString(PyExc_ValueError, "overflow in index_init_nt");
 			return -1;
 		}
-		self->nt = PyMem_Malloc(sizeof(nodetree));
+		self->nt = PyObject_New(nodetree, &nodetreeType);
 		if (self->nt == NULL) {
-			PyErr_NoMemory();
 			return -1;
 		}
 		unsigned capacity = (self->raw_length < 4 ? 4 : (int)self->raw_length / 2);
 		if (nt_init(self->nt, self, capacity) == -1) {
-			PyMem_Free(self->nt);
+			nt_dealloc(self->nt);
 			self->nt = NULL;
 			return -1;
 		}
@@ -2010,8 +2066,7 @@ 
 		self->offsets = NULL;
 	}
 	if (self->nt != NULL) {
-		free(self->nt->nodes);
-		PyMem_Free(self->nt);
+		nt_dealloc(self->nt);
 	}
 	self->nt = NULL;
 	Py_CLEAR(self->headrevs);
@@ -2035,6 +2090,7 @@ 
 	}
 	Py_XDECREF(self->data);
 	Py_XDECREF(self->added);
+	Py_XDECREF(self->nt);
 	PyObject_Del(self);
 }
 
@@ -2184,6 +2240,12 @@ 
 	Py_INCREF(&indexType);
 	PyModule_AddObject(mod, "index", (PyObject *)&indexType);
 
+	nodetreeType.tp_new = PyType_GenericNew;
+	if (PyType_Ready(&nodetreeType) < 0)
+		return;
+	Py_INCREF(&nodetreeType);
+	PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
+
 	nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
 				  -1, -1, -1, -1, nullid, 20);
 	if (nullentry)