Patchwork [V2] dirs: remove mutable string optimization at all

login
register
mail settings
Submitter Yuya Nishihara
Date Oct. 14, 2019, 3:54 a.m.
Message ID <dd9f93e8af83be1bfd70.1571025244@mimosa>
Download mbox | patch
Permalink /patch/42310/
State Accepted
Headers show

Comments

Yuya Nishihara - Oct. 14, 2019, 3:54 a.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1570964769 -32400
#      Sun Oct 13 20:06:09 2019 +0900
# Node ID dd9f93e8af83be1bfd7082026d28f774917f0593
# Parent  feaea7fe888383bcb1d28e69df325e41ef68ece9
dirs: remove mutable string optimization at all

As far as I can see, the optimization trick has been dead since 42e89b87ca79
"dirs: speed up by storing number of direct children per dir". After
42e89b87ca79, the key variable is cleared to NULL at each iteration.
Augie Fackler - Oct. 15, 2019, 1:16 p.m.
queued this, many thanks

> On Oct 13, 2019, at 23:54, Yuya Nishihara <yuya@tcha.org> wrote:
> 
> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1570964769 -32400
> #      Sun Oct 13 20:06:09 2019 +0900
> # Node ID dd9f93e8af83be1bfd7082026d28f774917f0593
> # Parent  feaea7fe888383bcb1d28e69df325e41ef68ece9
> dirs: remove mutable string optimization at all
> 
> As far as I can see, the optimization trick has been dead since 42e89b87ca79
> "dirs: speed up by storing number of direct children per dir". After
> 42e89b87ca79, the key variable is cleared to NULL at each iteration.
> 
> diff --git a/mercurial/cext/dirs.c b/mercurial/cext/dirs.c
> --- a/mercurial/cext/dirs.c
> +++ b/mercurial/cext/dirs.c
> @@ -26,9 +26,6 @@
>  *
>  * We modify Python integers for refcounting, but those integers are
>  * never visible to Python code.
> - *
> - * We mutate strings in-place, but leave them immutable once they can
> - * be seen by Python code.
>  */
> typedef struct {
> 	PyObject_HEAD
> @@ -63,46 +60,18 @@ static int _addpath(PyObject *dirs, PyOb
> 	* "protocol" such as mutating immutable objects. But since we only
> 	* mutate objects created in this function or in other well-defined
> 	* locations, the references are known so these violations should go
> -	* unnoticed. The code for adjusting the length of a PyBytesObject is
> -	* essentially a minimal version of _PyBytes_Resize. */
> +	* unnoticed. */
> 	while ((pos = _finddir(cpath, pos - 1)) != -1) {
> 		PyObject *val;
> 
> -		if (pos < 2) {
> -			key = PyBytes_FromStringAndSize(cpath, pos);
> -			if (key == NULL)
> -				goto bail;
> -		} else {
> -			/* It's likely that every prefix already has an entry
> -			   in our dict. Try to avoid allocating and
> -			   deallocating a string for each prefix we check. */
> -			if (key != NULL)
> -				((PyBytesObject *)key)->ob_shash = -1;
> -			else {
> -				/* We know pos >= 2, so we won't get a small
> -				 * shared string. */
> -				key = PyBytes_FromStringAndSize(cpath, pos);
> -				if (key == NULL)
> -					goto bail;
> -			}
> -			/* Py_SIZE(o) refers to the ob_size member of
> -			 * the struct. Yes, assigning to what looks
> -			 * like a function seems wrong. */
> -			Py_SIZE(key) = pos;
> -			((PyBytesObject *)key)->ob_sval[pos] = '\0';
> -		}
> +		key = PyBytes_FromStringAndSize(cpath, pos);
> +		if (key == NULL)
> +			goto bail;
> 
> 		val = PyDict_GetItem(dirs, key);
> 		if (val != NULL) {
> 			PYLONG_VALUE(val) += 1;
> -			if (pos < 2) {
> -				/* This was a short string, so we
> -				 * probably got a small shared string
> -				 * we can't mutate on the next loop
> -				 * iteration. Clear it.
> -				 */
> -				Py_CLEAR(key);
> -			}
> +			Py_CLEAR(key);
> 			break;
> 		}
> 
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Patch

diff --git a/mercurial/cext/dirs.c b/mercurial/cext/dirs.c
--- a/mercurial/cext/dirs.c
+++ b/mercurial/cext/dirs.c
@@ -26,9 +26,6 @@ 
  *
  * We modify Python integers for refcounting, but those integers are
  * never visible to Python code.
- *
- * We mutate strings in-place, but leave them immutable once they can
- * be seen by Python code.
  */
 typedef struct {
 	PyObject_HEAD
@@ -63,46 +60,18 @@  static int _addpath(PyObject *dirs, PyOb
 	* "protocol" such as mutating immutable objects. But since we only
 	* mutate objects created in this function or in other well-defined
 	* locations, the references are known so these violations should go
-	* unnoticed. The code for adjusting the length of a PyBytesObject is
-	* essentially a minimal version of _PyBytes_Resize. */
+	* unnoticed. */
 	while ((pos = _finddir(cpath, pos - 1)) != -1) {
 		PyObject *val;
 
-		if (pos < 2) {
-			key = PyBytes_FromStringAndSize(cpath, pos);
-			if (key == NULL)
-				goto bail;
-		} else {
-			/* It's likely that every prefix already has an entry
-			   in our dict. Try to avoid allocating and
-			   deallocating a string for each prefix we check. */
-			if (key != NULL)
-				((PyBytesObject *)key)->ob_shash = -1;
-			else {
-				/* We know pos >= 2, so we won't get a small
-				 * shared string. */
-				key = PyBytes_FromStringAndSize(cpath, pos);
-				if (key == NULL)
-					goto bail;
-			}
-			/* Py_SIZE(o) refers to the ob_size member of
-			 * the struct. Yes, assigning to what looks
-			 * like a function seems wrong. */
-			Py_SIZE(key) = pos;
-			((PyBytesObject *)key)->ob_sval[pos] = '\0';
-		}
+		key = PyBytes_FromStringAndSize(cpath, pos);
+		if (key == NULL)
+			goto bail;
 
 		val = PyDict_GetItem(dirs, key);
 		if (val != NULL) {
 			PYLONG_VALUE(val) += 1;
-			if (pos < 2) {
-				/* This was a short string, so we
-				 * probably got a small shared string
-				 * we can't mutate on the next loop
-				 * iteration. Clear it.
-				 */
-				Py_CLEAR(key);
-			}
+			Py_CLEAR(key);
 			break;
 		}