Patchwork D1973: bdiff: write a native version of splitnewlines

login
register
mail settings
Submitter phabricator
Date Feb. 2, 2018, 4:26 p.m.
Message ID <5c84588385030944c5c10ebfcdb3677b@localhost.localdomain>
Download mbox | patch
Permalink /patch/27236/
State Not Applicable
Headers show

Comments

phabricator - Feb. 2, 2018, 4:26 p.m.
durin42 updated this revision to Diff 5160.
durin42 marked 7 inline comments as done.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D1973?vs=5086&id=5160

REVISION DETAIL
  https://phab.mercurial-scm.org/D1973

AFFECTED FILES
  mercurial/cext/bdiff.c
  mercurial/mdiff.py
  mercurial/policy.py
  mercurial/pure/bdiff.py

CHANGE DETAILS




To: durin42, #hg-reviewers, indygreg
Cc: indygreg, yuja, mercurial-devel

Patch

diff --git a/mercurial/pure/bdiff.py b/mercurial/pure/bdiff.py
--- a/mercurial/pure/bdiff.py
+++ b/mercurial/pure/bdiff.py
@@ -90,3 +90,13 @@ 
         text = re.sub('[ \t\r]+', ' ', text)
         text = text.replace(' \n', '\n')
     return text
+
+def splitnewlines(text):
+    '''like str.splitlines, but only split on newlines.'''
+    lines = [l + '\n' for l in text.split('\n')]
+    if lines:
+        if lines[-1] == '\n':
+            lines.pop()
+        else:
+            lines[-1] = lines[-1][:-1]
+    return lines
diff --git a/mercurial/policy.py b/mercurial/policy.py
--- a/mercurial/policy.py
+++ b/mercurial/policy.py
@@ -71,7 +71,7 @@ 
 # keep in sync with "version" in C modules
 _cextversions = {
     (r'cext', r'base85'): 1,
-    (r'cext', r'bdiff'): 1,
+    (r'cext', r'bdiff'): 2,
     (r'cext', r'diffhelpers'): 1,
     (r'cext', r'mpatch'): 1,
     (r'cext', r'osutil'): 3,
diff --git a/mercurial/mdiff.py b/mercurial/mdiff.py
--- a/mercurial/mdiff.py
+++ b/mercurial/mdiff.py
@@ -29,16 +29,7 @@ 
 patches = mpatch.patches
 patchedsize = mpatch.patchedsize
 textdiff = bdiff.bdiff
-
-def splitnewlines(text):
-    '''like str.splitlines, but only split on newlines.'''
-    lines = [l + '\n' for l in text.split('\n')]
-    if lines:
-        if lines[-1] == '\n':
-            lines.pop()
-        else:
-            lines[-1] = lines[-1][:-1]
-    return lines
+splitnewlines = bdiff.splitnewlines
 
 class diffopts(object):
     '''context is the number of context lines
diff --git a/mercurial/cext/bdiff.c b/mercurial/cext/bdiff.c
--- a/mercurial/cext/bdiff.c
+++ b/mercurial/cext/bdiff.c
@@ -182,17 +182,67 @@ 
 	return result ? result : PyErr_NoMemory();
 }
 
+bool sliceintolist(PyObject *list, Py_ssize_t destidx,
+		   const char *source, Py_ssize_t len) {
+	PyObject *sliced = PyBytes_FromStringAndSize(source, len);
+	if (sliced == NULL)
+		return false;
+	PyList_SET_ITEM(list, destidx, sliced);
+	return true;
+}
+
+static PyObject *splitnewlines(PyObject *self, PyObject *args)
+{
+	const char *text;
+	Py_ssize_t nelts = 0, size, i, start = 0;
+	PyObject *result = NULL;
+
+	if (!PyArg_ParseTuple(args, "s#", &text, &size)) {
+		goto abort;
+	}
+	if (!size) {
+		return PyList_New(0);
+	}
+	/* This loops to size-1 because if the last byte is a newline,
+	 * we don't want to perform a split there. */
+	for (i = 0; i < size - 1; ++i) {
+		if (text[i] == '\n') {
+			++nelts;
+		}
+	}
+	if ((result = PyList_New(nelts+1)) == NULL)
+		goto abort;
+	nelts = 0;
+	for (i = 0; i < size - 1; ++i) {
+		if (text[i] == '\n') {
+			if (!sliceintolist(
+				    result, nelts++, text+start, i-start+1))
+				goto abort;
+			start = i+1;
+		}
+	}
+	if (start < size) {
+		if (!sliceintolist(result, nelts++, text+start, size-start))
+			goto abort;
+	}
+	return result;
+abort:
+	Py_XDECREF(result);
+	return NULL;
+}
 
 static char mdiff_doc[] = "Efficient binary diff.";
 
 static PyMethodDef methods[] = {
 	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
 	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
 	{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
+	{"splitnewlines", splitnewlines, METH_VARARGS,
+	 "like str.splitlines, but only split on newlines\n"},
 	{NULL, NULL}
 };
 
-static const int version = 1;
+static const int version = 2;
 
 #ifdef IS_PY3K
 static struct PyModuleDef bdiff_module = {