Patchwork [4,of,5] mpatch: remove dependency on Python.h in mpatch.c

login
register
mail settings
Submitter Maciej Fijalkowski
Date July 25, 2016, 1:10 p.m.
Message ID <8e2d1b2d5248620ff4cc.1469452258@brick.arcode.com>
Download mbox | patch
Permalink /patch/15982/
State Deferred
Headers show

Comments

Maciej Fijalkowski - July 25, 2016, 1:10 p.m.
# HG changeset patch
# User Maciej Fijalkowski <fijall@gmail.com>
# Date 1469201285 -7200
#      Fri Jul 22 17:28:05 2016 +0200
# Node ID 8e2d1b2d5248620ff4ccfcb61e1b383cf0e4e204
# Parent  e7436cb7226d5216b18d25560a14331871b804c0
mpatch: remove dependency on Python.h in mpatch.c

Now all the CPython-related stuff are referenced only from
mpatch_module.c with mpatch.c being freely usable from
a future cffi module
Yuya Nishihara - Aug. 5, 2016, 1:54 p.m.
On Mon, 25 Jul 2016 15:10:58 +0200, Maciej Fijalkowski wrote:
> # HG changeset patch
> # User Maciej Fijalkowski <fijall@gmail.com>
> # Date 1469201285 -7200
> #      Fri Jul 22 17:28:05 2016 +0200
> # Node ID 8e2d1b2d5248620ff4ccfcb61e1b383cf0e4e204
> # Parent  e7436cb7226d5216b18d25560a14331871b804c0
> mpatch: remove dependency on Python.h in mpatch.c

> diff --git a/mercurial/mpatch_module.c b/mercurial/mpatch_module.c
> --- a/mercurial/mpatch_module.c
> +++ b/mercurial/mpatch_module.c
> @@ -33,12 +33,33 @@
>  static char mpatch_doc[] = "Efficient binary patching.";
>  static PyObject *mpatch_Error;
>  
> +struct mpatch_flist *cpygetitem(void *bins, ssize_t pos)
> +{
> +	const char *buffer;
> +	struct mpatch_flist *res;
> +	ssize_t blen;
> +	int r;
> +
> +	PyObject *tmp = PyList_GetItem((PyObject*)bins, pos);
> +	if (!tmp)
> +		return NULL;
> +	if (PyObject_AsCharBuffer(tmp, &buffer, (Py_ssize_t*)&blen))
> +		return NULL;
> +	if ((r = mpatch_decode(buffer, blen, &res)) < 0) {
> +		if (!PyErr_Occurred())
> +			PyErr_SetString(mpatch_Error, mpatch_errors[-r]);

Maybe it should raise MemoryError if r == MPATCH_ERR_NO_MEM.

Other than that, this patch looks good to me.
Maciej Fijalkowski - Aug. 10, 2016, 2:43 p.m.
On Fri, Aug 5, 2016 at 3:54 PM, Yuya Nishihara <yuya@tcha.org> wrote:
> On Mon, 25 Jul 2016 15:10:58 +0200, Maciej Fijalkowski wrote:
>> # HG changeset patch
>> # User Maciej Fijalkowski <fijall@gmail.com>
>> # Date 1469201285 -7200
>> #      Fri Jul 22 17:28:05 2016 +0200
>> # Node ID 8e2d1b2d5248620ff4ccfcb61e1b383cf0e4e204
>> # Parent  e7436cb7226d5216b18d25560a14331871b804c0
>> mpatch: remove dependency on Python.h in mpatch.c
>
>> diff --git a/mercurial/mpatch_module.c b/mercurial/mpatch_module.c
>> --- a/mercurial/mpatch_module.c
>> +++ b/mercurial/mpatch_module.c
>> @@ -33,12 +33,33 @@
>>  static char mpatch_doc[] = "Efficient binary patching.";
>>  static PyObject *mpatch_Error;
>>
>> +struct mpatch_flist *cpygetitem(void *bins, ssize_t pos)
>> +{
>> +     const char *buffer;
>> +     struct mpatch_flist *res;
>> +     ssize_t blen;
>> +     int r;
>> +
>> +     PyObject *tmp = PyList_GetItem((PyObject*)bins, pos);
>> +     if (!tmp)
>> +             return NULL;
>> +     if (PyObject_AsCharBuffer(tmp, &buffer, (Py_ssize_t*)&blen))
>> +             return NULL;
>> +     if ((r = mpatch_decode(buffer, blen, &res)) < 0) {
>> +             if (!PyErr_Occurred())
>> +                     PyErr_SetString(mpatch_Error, mpatch_errors[-r]);
>
> Maybe it should raise MemoryError if r == MPATCH_ERR_NO_MEM.
>
> Other than that, this patch looks good to me.

Maybe it should, but this is replicating the way the C version works I
think (?). My theory is that the versions should not differ much in
terms of behavior, even though guaranteeing sane behavior when you're
out of memory is plain impossible in python
Yuya Nishihara - Aug. 11, 2016, 8:27 a.m.
On Wed, 10 Aug 2016 16:43:46 +0200, Maciej Fijalkowski wrote:
> On Fri, Aug 5, 2016 at 3:54 PM, Yuya Nishihara <yuya@tcha.org> wrote:
> > On Mon, 25 Jul 2016 15:10:58 +0200, Maciej Fijalkowski wrote:
> >> # HG changeset patch
> >> # User Maciej Fijalkowski <fijall@gmail.com>
> >> # Date 1469201285 -7200
> >> #      Fri Jul 22 17:28:05 2016 +0200
> >> # Node ID 8e2d1b2d5248620ff4ccfcb61e1b383cf0e4e204
> >> # Parent  e7436cb7226d5216b18d25560a14331871b804c0
> >> mpatch: remove dependency on Python.h in mpatch.c
> >
> >> diff --git a/mercurial/mpatch_module.c b/mercurial/mpatch_module.c
> >> --- a/mercurial/mpatch_module.c
> >> +++ b/mercurial/mpatch_module.c
> >> @@ -33,12 +33,33 @@
> >>  static char mpatch_doc[] = "Efficient binary patching.";
> >>  static PyObject *mpatch_Error;
> >>
> >> +struct mpatch_flist *cpygetitem(void *bins, ssize_t pos)
> >> +{
> >> +     const char *buffer;
> >> +     struct mpatch_flist *res;
> >> +     ssize_t blen;
> >> +     int r;
> >> +
> >> +     PyObject *tmp = PyList_GetItem((PyObject*)bins, pos);
> >> +     if (!tmp)
> >> +             return NULL;
> >> +     if (PyObject_AsCharBuffer(tmp, &buffer, (Py_ssize_t*)&blen))
> >> +             return NULL;
> >> +     if ((r = mpatch_decode(buffer, blen, &res)) < 0) {
> >> +             if (!PyErr_Occurred())
> >> +                     PyErr_SetString(mpatch_Error, mpatch_errors[-r]);
> >
> > Maybe it should raise MemoryError if r == MPATCH_ERR_NO_MEM.
> >
> > Other than that, this patch looks good to me.
> 
> Maybe it should, but this is replicating the way the C version works I
> think (?). My theory is that the versions should not differ much in
> terms of behavior, even though guaranteeing sane behavior when you're
> out of memory is plain impossible in python

The original implementation would raise MemoryError. I've fixed it in that
way. Also, I think it's generally possible to handle out of memory caused
by one big malloc(), not by a lot of malloc()s of small objects.

Patch

diff --git a/mercurial/mpatch.c b/mercurial/mpatch.c
--- a/mercurial/mpatch.c
+++ b/mercurial/mpatch.c
@@ -20,26 +20,15 @@ 
  of the GNU General Public License, incorporated herein by reference.
 */
 
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
 #include <stdlib.h>
 #include <string.h>
 
-#include "util.h"
 #include "bitmanipulation.h"
 #include "compat.h"
+#include "mpatch.h"
 
-static char mpatch_doc[] = "Efficient binary patching.";
-static PyObject *mpatch_Error;
-
-struct mpatch_frag {
-	int start, end, len;
-	const char *data;
-};
-
-struct mpatch_flist {
-	struct mpatch_frag *base, *head, *tail;
-};
+char *mpatch_errors[] = {NULL, "invalid patch", "patch cannot be decoded",
+						"no memory"};
 
 struct mpatch_flist *lalloc(ssize_t size)
 {
@@ -56,10 +45,7 @@ 
 			return a;
 		}
 		free(a);
-		a = NULL;
 	}
-	if (!PyErr_Occurred())
-		PyErr_NoMemory();
 	return NULL;
 }
 
@@ -202,7 +188,7 @@ 
 }
 
 /* decode a binary patch into a hunk list */
-struct mpatch_flist *mpatch_decode(const char *bin, ssize_t len)
+int mpatch_decode(const char *bin, ssize_t len, struct mpatch_flist **res)
 {
 	struct mpatch_flist *l;
 	struct mpatch_frag *lt;
@@ -211,7 +197,7 @@ 
 	/* assume worst case size, we won't have many of these lists */
 	l = lalloc(len / 12 + 1);
 	if (!l)
-		return NULL;
+		return MPATCH_ERR_NO_MEM;
 
 	lt = l->tail;
 
@@ -227,14 +213,13 @@ 
 	}
 
 	if (pos != len) {
-		if (!PyErr_Occurred())
-			PyErr_SetString(mpatch_Error, "patch cannot be decoded");
 		mpatch_lfree(l);
-		return NULL;
+		return MPATCH_ERR_CANNOT_BE_DECODED;
 	}
 
 	l->tail = lt;
-	return l;
+	*res = l;
+	return 0;
 }
 
 /* calculate the size of resultant text */
@@ -245,10 +230,7 @@ 
 
 	while (f != l->tail) {
 		if (f->start < last || f->end > len) {
-			if (!PyErr_Occurred())
-				PyErr_SetString(mpatch_Error,
-				                "invalid patch");
-			return -1;
+			return MPATCH_ERR_INVALID_PATCH;
 		}
 		outlen += f->start - last;
 		last = f->end;
@@ -269,10 +251,7 @@ 
 
 	while (f != l->tail) {
 		if (f->start < last || f->end > len) {
-			if (!PyErr_Occurred())
-				PyErr_SetString(mpatch_Error,
-				                "invalid patch");
-			return 0;
+			return MPATCH_ERR_INVALID_PATCH;
 		}
 		memcpy(p, orig + last, f->start - last);
 		p += f->start - last;
@@ -282,29 +261,24 @@ 
 		f++;
 	}
 	memcpy(p, orig + last, len - last);
-	return 1;
+	return 0;
 }
 
 /* recursively generate a patch of all bins between start and end */
-struct mpatch_flist *mpatch_fold(PyObject *bins, ssize_t start,
-	ssize_t end)
+struct mpatch_flist *mpatch_fold(void *bins,
+	struct mpatch_flist* (*get_next_item)(void*, ssize_t),
+	ssize_t start, ssize_t end)
 {
-	ssize_t len, blen;
-	const char *buffer;
+	ssize_t len;
 
 	if (start + 1 == end) {
 		/* trivial case, output a decoded list */
-		PyObject *tmp = PyList_GetItem(bins, start);
-		if (!tmp)
-			return NULL;
-		if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
-			return NULL;
-		return mpatch_decode(buffer, blen);
+		return get_next_item(bins, start);
 	}
 
 	/* divide and conquer, memory management is elsewhere */
 	len = (end - start) / 2;
-	return combine(mpatch_fold(bins, start, start + len),
-		       mpatch_fold(bins, start + len, end));
+	return combine(mpatch_fold(bins, get_next_item, start, start + len),
+		       mpatch_fold(bins, get_next_item, start + len, end));
 }
 
diff --git a/mercurial/mpatch.h b/mercurial/mpatch.h
--- a/mercurial/mpatch.h
+++ b/mercurial/mpatch.h
@@ -1,6 +1,12 @@ 
 #ifndef _HG_MPATCH_H_
 #define _HG_MPATCH_H_
 
+extern char *mpatch_errors[];
+
+#define MPATCH_ERR_NO_MEM -3
+#define MPATCH_ERR_CANNOT_BE_DECODED -2
+#define MPATCH_ERR_INVALID_PATCH -1
+
 struct mpatch_frag {
 	int start, end, len;
 	const char *data;
@@ -15,6 +21,8 @@ 
 void mpatch_lfree(struct mpatch_flist *a);
 int mpatch_apply(char *buf, const char *orig, ssize_t len,
 	struct mpatch_flist *l);
-struct mpatch_flist *mpatch_fold(void *bins, ssize_t start, ssize_t end);
+struct mpatch_flist *mpatch_fold(void *bins,
+	struct mpatch_flist* (*get_next_item)(void*, ssize_t),
+	ssize_t start, ssize_t end);
 
 #endif
diff --git a/mercurial/mpatch_module.c b/mercurial/mpatch_module.c
--- a/mercurial/mpatch_module.c
+++ b/mercurial/mpatch_module.c
@@ -33,12 +33,33 @@ 
 static char mpatch_doc[] = "Efficient binary patching.";
 static PyObject *mpatch_Error;
 
+struct mpatch_flist *cpygetitem(void *bins, ssize_t pos)
+{
+	const char *buffer;
+	struct mpatch_flist *res;
+	ssize_t blen;
+	int r;
+
+	PyObject *tmp = PyList_GetItem((PyObject*)bins, pos);
+	if (!tmp)
+		return NULL;
+	if (PyObject_AsCharBuffer(tmp, &buffer, (Py_ssize_t*)&blen))
+		return NULL;
+	if ((r = mpatch_decode(buffer, blen, &res)) < 0) {
+		if (!PyErr_Occurred())
+			PyErr_SetString(mpatch_Error, mpatch_errors[-r]);
+ 		return NULL;
+	}
+	return res;
+}
+
 static PyObject *
 patches(PyObject *self, PyObject *args)
 {
 	PyObject *text, *bins, *result;
 	struct mpatch_flist *patch;
 	const char *in;
+	int r;
 	char *out;
 	Py_ssize_t len, outlen, inlen;
 
@@ -55,12 +76,16 @@ 
 	if (PyObject_AsCharBuffer(text, &in, &inlen))
 		return NULL;
 
-	patch = mpatch_fold(bins, 0, len);
-	if (!patch)
+	patch = mpatch_fold(bins, cpygetitem, 0, len);
+	if (!patch) { /* error already set or memory error */
+		if (!PyErr_Occurred())
+			PyErr_NoMemory();
 		return NULL;
+	}
 
 	outlen = mpatch_calcsize(inlen, patch);
 	if (outlen < 0) {
+		r = (int)outlen;
 		result = NULL;
 		goto cleanup;
 	}
@@ -70,12 +95,14 @@ 
 		goto cleanup;
 	}
 	out = PyBytes_AsString(result);
-	if (!mpatch_apply(out, in, inlen, patch)) {
+	if ((r = mpatch_apply(out, in, inlen, patch)) < 0) {
 		Py_DECREF(result);
 		result = NULL;
 	}
 cleanup:
 	mpatch_lfree(patch);
+	if (!result && !PyErr_Occurred())
+		PyErr_SetString(mpatch_Error, mpatch_errors[-r]);
 	return result;
 }