@@ -9,6 +9,7 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
+#include <assert.h>
#include "charencode.h"
#include "util.h"
@@ -63,6 +64,42 @@ static const char uppertable[128] = {
'\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
};
+/* 1: no escape, 2: \<c>, 6: \u<x> */
+static const uint8_t jsonlentable[256] = {
+ 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+
+static const uint8_t jsonparanoidlentable[128] = {
+ 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
+};
+
+static const char hexchartable[16] = {
+ '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
+};
+
/*
* Turn a hex-encoded string into binary.
*/
@@ -217,3 +254,105 @@ quit:
Py_XDECREF(file_foldmap);
return NULL;
}
+
+/* calculate length of JSON-escaped string; returns -1 if unsupported */
+static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid)
+{
+ Py_ssize_t i, esclen = 0;
+
+ if (paranoid) {
+ /* don't want to process multi-byte escapes in C */
+ for (i = 0; i < len; i++) {
+ char c = buf[i];
+ if (c & 0x80) {
+ PyErr_SetString(PyExc_ValueError,
+ "cannot process non-ascii str");
+ return -1;
+ }
+ esclen += jsonparanoidlentable[(unsigned char)c];
+ }
+ } else {
+ for (i = 0; i < len; i++) {
+ char c = buf[i];
+ esclen += jsonlentable[(unsigned char)c];
+ }
+ }
+
+ return esclen;
+}
+
+/* map '\<c>' escape character */
+static char jsonescapechar2(char c)
+{
+ switch (c) {
+ case '\b': return 'b';
+ case '\t': return 't';
+ case '\n': return 'n';
+ case '\f': return 'f';
+ case '\r': return 'r';
+ case '"': return '"';
+ case '\\': return '\\';
+ }
+ return '\0'; /* should not happen */
+}
+
+/* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only
+ include characters mappable by json(paranoid)lentable */
+static void encodejsonescape(char *escbuf, Py_ssize_t esclen,
+ const char *origbuf, Py_ssize_t origlen,
+ bool paranoid)
+{
+ const uint8_t *lentable =
+ (paranoid) ? jsonparanoidlentable : jsonlentable;
+ Py_ssize_t i, j;
+
+ for (i = 0, j = 0; i < origlen; i++) {
+ char c = origbuf[i];
+ uint8_t l = lentable[(unsigned char)c];
+ assert(j + l <= esclen);
+ switch (l) {
+ case 1:
+ escbuf[j] = c;
+ break;
+ case 2:
+ escbuf[j] = '\\';
+ escbuf[j + 1] = jsonescapechar2(c);
+ break;
+ case 6:
+ memcpy(escbuf + j, "\\u00", 4);
+ escbuf[j + 4] = hexchartable[(unsigned char)c >> 4];
+ escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf];
+ break;
+ }
+ j += l;
+ }
+}
+
+PyObject *jsonescapeu8fast(PyObject *self, PyObject *args)
+{
+ PyObject *origstr, *escstr;
+ const char *origbuf;
+ Py_ssize_t origlen, esclen;
+ int paranoid;
+ if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast",
+ &PyBytes_Type, &origstr, ¶noid))
+ return NULL;
+
+ origbuf = PyBytes_AS_STRING(origstr);
+ origlen = PyBytes_GET_SIZE(origstr);
+ esclen = jsonescapelen(origbuf, origlen, paranoid);
+ if (esclen < 0)
+ return NULL; /* unsupported char found */
+ if (origlen == esclen) {
+ Py_INCREF(origstr);
+ return origstr;
+ }
+
+ escstr = PyBytes_FromStringAndSize(NULL, esclen);
+ if (!escstr)
+ return NULL;
+ encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen,
+ paranoid);
+
+ return escstr;
+}
@@ -22,6 +22,7 @@ PyObject *unhexlify(const char *str, Py_
PyObject *asciilower(PyObject *self, PyObject *args);
PyObject *asciiupper(PyObject *self, PyObject *args);
PyObject *make_file_foldmap(PyObject *self, PyObject *args);
+PyObject *jsonescapeu8fast(PyObject *self, PyObject *args);
static const int8_t hextable[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@@ -702,6 +702,8 @@ static PyMethodDef methods[] = {
"construct a dict with an expected size\n"},
{"make_file_foldmap", make_file_foldmap, METH_VARARGS,
"make file foldmap\n"},
+ {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
+ "escape a UTF-8 byte string to JSON (fast path)\n"},
{"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
{"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
{"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
@@ -714,7 +716,7 @@ void dirs_module_init(PyObject *mod);
void manifest_module_init(PyObject *mod);
void revlog_module_init(PyObject *mod);
-static const int version = 1;
+static const int version = 2;
static void module_init(PyObject *mod)
{
@@ -26,7 +26,7 @@ charencode = policy.importmod(r'charenco
asciilower = charencode.asciilower
asciiupper = charencode.asciiupper
-_jsonescapeu8fast = charencodepure.jsonescapeu8fast # TODO: no "pure"
+_jsonescapeu8fast = charencode.jsonescapeu8fast
_sysstr = pycompat.sysstr
@@ -404,8 +404,8 @@ def jsonescape(s, paranoid=False):
'this is a test'
>>> jsonescape('escape characters: \\0 \\x0b \\x7f')
'escape characters: \\\\u0000 \\\\u000b \\\\u007f'
- >>> jsonescape('escape characters: \\t \\n \\r \\" \\\\')
- 'escape characters: \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
+ >>> jsonescape('escape characters: \\b \\t \\n \\f \\r \\" \\\\')
+ 'escape characters: \\\\b \\\\t \\\\n \\\\f \\\\r \\\\" \\\\\\\\'
>>> jsonescape('a weird byte: \\xdd')
'a weird byte: \\xed\\xb3\\x9d'
>>> jsonescape('utf-8: caf\\xc3\\xa9')
@@ -416,6 +416,10 @@ def jsonescape(s, paranoid=False):
If paranoid, non-ascii and common troublesome characters are also escaped.
This is suitable for web output.
+ >>> s = 'escape characters: \\0 \\x0b \\x7f'
+ >>> assert jsonescape(s) == jsonescape(s, paranoid=True)
+ >>> s = 'escape characters: \\b \\t \\n \\f \\r \\" \\\\'
+ >>> assert jsonescape(s) == jsonescape(s, paranoid=True)
>>> jsonescape('escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True)
'escape boundary: ~ \\\\u007f \\\\u0080'
>>> jsonescape('a weird byte: \\xdd', paranoid=True)
@@ -75,7 +75,7 @@ def _importfrom(pkgname, modname):
(r'cext', r'diffhelpers'): 1,
(r'cext', r'mpatch'): 1,
(r'cext', r'osutil'): 1,
- (r'cext', r'parsers'): 1,
+ (r'cext', r'parsers'): 2,
}
# map import request to other package or module