Patchwork [2,of,7,resend] pathencode: add a SHA-1 hash function

login
register
mail settings
Submitter Bryan O'Sullivan
Date Dec. 12, 2012, 9:10 p.m.
Message ID <1124cfd02d08caffaa91.1355346657@australite.local>
Download mbox | patch
Permalink /patch/71/
State Accepted
Commit 3aa9b2136593b161262d1055d479c582f55e01c3
Delegated to: Matt Mackall
Headers show

Comments

Bryan O'Sullivan - Dec. 12, 2012, 9:10 p.m.
# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1355346574 28800
# Node ID 1124cfd02d08caffaa9127aae4404c67f90dadcb
# Parent  e4631ea16083476a48cc350de24c540d7fe0fad5
pathencode: add a SHA-1 hash function

This will be used by an upcoming patch.

This calls out to the Python hash implementation.

An earlier version of this function implemented SHA-1 directly, but
the amount of extra code didn't seem like a good tradeoff compared
to the small big-picture increase in performance (long paths are
uncommon).

Patch

diff --git a/mercurial/pathencode.c b/mercurial/pathencode.c
--- a/mercurial/pathencode.c
+++ b/mercurial/pathencode.c
@@ -524,6 +524,59 @@  PyObject *lowerencode(PyObject *self, Py
 }
 
 /*
+ * Avoiding a trip through Python would improve performance by 50%,
+ * but we don't encounter enough long names to be worth the code.
+ */
+static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
+{
+	static PyObject *shafunc;
+	PyObject *shaobj, *hashobj;
+
+	if (shafunc == NULL) {
+		PyObject *util, *name = PyString_FromString("mercurial.util");
+
+		if (name == NULL)
+			return -1;
+
+		util = PyImport_Import(name);
+		Py_DECREF(name);
+
+		if (util == NULL) {
+			PyErr_SetString(PyExc_ImportError, "mercurial.util");
+			return -1;
+		}
+		shafunc = PyObject_GetAttrString(util, "sha1");
+		Py_DECREF(util);
+
+		if (shafunc == NULL) {
+			PyErr_SetString(PyExc_AttributeError,
+					"module 'mercurial.util' has no "
+					"attribute 'sha1'");
+			return -1;
+		}
+	}
+
+	shaobj = PyObject_CallFunction(shafunc, "s#", str, len);
+
+	if (shaobj == NULL)
+		return -1;
+
+	hashobj = PyObject_CallMethod(shaobj, "digest", "");
+	Py_DECREF(shaobj);
+
+	if (!PyString_Check(hashobj) || PyString_GET_SIZE(hashobj) != 20) {
+		PyErr_SetString(PyExc_TypeError,
+				"result of digest is not a 20-byte hash");
+		Py_DECREF(hashobj);
+		return -1;
+	}
+
+	memcpy(hash, PyString_AS_STRING(hashobj), 20);
+	Py_DECREF(hashobj);
+	return 0;
+}
+
+/*
  * We currently implement only basic encoding.
  *
  * If a name is too long to encode due to Windows path name limits,