Patchwork [2,of,6,foldmap-in-C] parsers._asciitransform: also accept a fallback function

login
register
mail settings
Submitter Siddharth Agarwal
Date April 3, 2015, 4:02 a.m.
Message ID <e76c0533d7ef1091fa1a.1428033735@devbig136.prn2.facebook.com>
Download mbox | patch
Permalink /patch/8469/
State Accepted
Commit e4a733c34bc64a0c1def98649d04798bf7b2cc2e
Headers show

Comments

Siddharth Agarwal - April 3, 2015, 4:02 a.m.
# HG changeset patch
# User Siddharth Agarwal <sid0@fb.com>
# Date 1427869323 25200
#      Tue Mar 31 23:22:03 2015 -0700
# Node ID e76c0533d7ef1091fa1ac4252f66bbcda3073780
# Parent  a0f05ad01122ba8cd3653af01f4dfd13a839dc02
parsers._asciitransform: also accept a fallback function

This function will be used in upcoming patches to provide a C implementation of
the function to generate the foldmap.
Adrian Buehlmann - April 3, 2015, 6:29 a.m.
On 2015-04-03 06:02, Siddharth Agarwal wrote:
> # HG changeset patch
> # User Siddharth Agarwal <sid0@fb.com>
> # Date 1427869323 25200
> #      Tue Mar 31 23:22:03 2015 -0700
> # Node ID e76c0533d7ef1091fa1ac4252f66bbcda3073780
> # Parent  a0f05ad01122ba8cd3653af01f4dfd13a839dc02
> parsers._asciitransform: also accept a fallback function
> 
> This function will be used in upcoming patches to provide a C implementation of
> the function to generate the foldmap.
> 
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -115,7 +115,8 @@ PyObject *unhexlify(const char *str, int
>  }
>  
>  static inline PyObject *_asciitransform(PyObject *str_obj,
> -					const char table[128])
> +					const char table[128],
> +					PyObject *fallback_fn)
>  {
>  	char *str, *newstr;
>  	Py_ssize_t i, len;
> @@ -134,11 +135,16 @@ static inline PyObject *_asciitransform(
>  	for (i = 0; i < len; i++) {
>  		char c = str[i];
>  		if (c & 0x80) {
> -			PyObject *err = PyUnicodeDecodeError_Create(
> -				"ascii", str, len, i, (i + 1),
> -				"unexpected code byte");
> -			PyErr_SetObject(PyExc_UnicodeDecodeError, err);
> -			Py_XDECREF(err);
> +			if (fallback_fn != NULL) {
> +				ret = PyObject_CallFunctionObjArgs(fallback_fn,
> +					str_obj, NULL);

Shouldn't you quit the for loop here?

If yes: did you actually ever hit this line during your testing? If not,
adding some tests (perhaps doctests?) might be helpful.

> +			} else {
> +				PyObject *err = PyUnicodeDecodeError_Create(
> +					"ascii", str, len, i, (i + 1),
> +					"unexpected code byte");
> +				PyErr_SetObject(PyExc_UnicodeDecodeError, err);
> +				Py_XDECREF(err);
> +			}
>  			goto quit;
>  		}
>  		newstr[i] = table[(unsigned char)c];
> @@ -156,7 +162,7 @@ static PyObject *asciilower(PyObject *se
>  	PyObject *str_obj;
>  	if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
>  		return NULL;
> -	return _asciitransform(str_obj, lowertable);
> +	return _asciitransform(str_obj, lowertable, NULL);
>  }
>  
>  static PyObject *asciiupper(PyObject *self, PyObject *args)
> @@ -164,7 +170,7 @@ static PyObject *asciiupper(PyObject *se
>  	PyObject *str_obj;
>  	if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
>  		return NULL;
> -	return _asciitransform(str_obj, uppertable);
> +	return _asciitransform(str_obj, uppertable, NULL);
>  }
>  
>  /*
Adrian Buehlmann - April 3, 2015, 6:55 a.m.
On 2015-04-03 08:29, Adrian Buehlmann wrote:
> On 2015-04-03 06:02, Siddharth Agarwal wrote:
>> # HG changeset patch
>> # User Siddharth Agarwal <sid0@fb.com>
>> # Date 1427869323 25200
>> #      Tue Mar 31 23:22:03 2015 -0700
>> # Node ID e76c0533d7ef1091fa1ac4252f66bbcda3073780
>> # Parent  a0f05ad01122ba8cd3653af01f4dfd13a839dc02
>> parsers._asciitransform: also accept a fallback function
>>
>> This function will be used in upcoming patches to provide a C implementation of
>> the function to generate the foldmap.
>>
>> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
>> --- a/mercurial/parsers.c
>> +++ b/mercurial/parsers.c
>> @@ -115,7 +115,8 @@ PyObject *unhexlify(const char *str, int
>>  }
>>  
>>  static inline PyObject *_asciitransform(PyObject *str_obj,
>> -					const char table[128])
>> +					const char table[128],
>> +					PyObject *fallback_fn)
>>  {
>>  	char *str, *newstr;
>>  	Py_ssize_t i, len;
>> @@ -134,11 +135,16 @@ static inline PyObject *_asciitransform(
>>  	for (i = 0; i < len; i++) {
>>  		char c = str[i];
>>  		if (c & 0x80) {
>> -			PyObject *err = PyUnicodeDecodeError_Create(
>> -				"ascii", str, len, i, (i + 1),
>> -				"unexpected code byte");
>> -			PyErr_SetObject(PyExc_UnicodeDecodeError, err);
>> -			Py_XDECREF(err);
>> +			if (fallback_fn != NULL) {
>> +				ret = PyObject_CallFunctionObjArgs(fallback_fn,
>> +					str_obj, NULL);
> 
> Shouldn't you quit the for loop here?
> 
> If yes: did you actually ever hit this line during your testing? If not,
> adding some tests (perhaps doctests?) might be helpful.
> 
>> +			} else {
>> +				PyObject *err = PyUnicodeDecodeError_Create(
>> +					"ascii", str, len, i, (i + 1),
>> +					"unexpected code byte");
>> +				PyErr_SetObject(PyExc_UnicodeDecodeError, err);
>> +				Py_XDECREF(err);
>> +			}
>>  			goto quit;

Please forget my question above. The quit is now shared down here.
Everything fine.

>>  		}
>>  		newstr[i] = table[(unsigned char)c];
>> @@ -156,7 +162,7 @@ static PyObject *asciilower(PyObject *se
>>  	PyObject *str_obj;
>>  	if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
>>  		return NULL;
>> -	return _asciitransform(str_obj, lowertable);
>> +	return _asciitransform(str_obj, lowertable, NULL);
>>  }
>>  
>>  static PyObject *asciiupper(PyObject *self, PyObject *args)
>> @@ -164,7 +170,7 @@ static PyObject *asciiupper(PyObject *se
>>  	PyObject *str_obj;
>>  	if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
>>  		return NULL;
>> -	return _asciitransform(str_obj, uppertable);
>> +	return _asciitransform(str_obj, uppertable, NULL);
>>  }
>>  
>>  /*
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel

Patch

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -115,7 +115,8 @@  PyObject *unhexlify(const char *str, int
 }
 
 static inline PyObject *_asciitransform(PyObject *str_obj,
-					const char table[128])
+					const char table[128],
+					PyObject *fallback_fn)
 {
 	char *str, *newstr;
 	Py_ssize_t i, len;
@@ -134,11 +135,16 @@  static inline PyObject *_asciitransform(
 	for (i = 0; i < len; i++) {
 		char c = str[i];
 		if (c & 0x80) {
-			PyObject *err = PyUnicodeDecodeError_Create(
-				"ascii", str, len, i, (i + 1),
-				"unexpected code byte");
-			PyErr_SetObject(PyExc_UnicodeDecodeError, err);
-			Py_XDECREF(err);
+			if (fallback_fn != NULL) {
+				ret = PyObject_CallFunctionObjArgs(fallback_fn,
+					str_obj, NULL);
+			} else {
+				PyObject *err = PyUnicodeDecodeError_Create(
+					"ascii", str, len, i, (i + 1),
+					"unexpected code byte");
+				PyErr_SetObject(PyExc_UnicodeDecodeError, err);
+				Py_XDECREF(err);
+			}
 			goto quit;
 		}
 		newstr[i] = table[(unsigned char)c];
@@ -156,7 +162,7 @@  static PyObject *asciilower(PyObject *se
 	PyObject *str_obj;
 	if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
 		return NULL;
-	return _asciitransform(str_obj, lowertable);
+	return _asciitransform(str_obj, lowertable, NULL);
 }
 
 static PyObject *asciiupper(PyObject *self, PyObject *args)
@@ -164,7 +170,7 @@  static PyObject *asciiupper(PyObject *se
 	PyObject *str_obj;
 	if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
 		return NULL;
-	return _asciitransform(str_obj, uppertable);
+	return _asciitransform(str_obj, uppertable, NULL);
 }
 
 /*