Patchwork [3,of,9] dicthelpers: implement diff and join for dicts in C

login
register
mail settings
Submitter Siddharth Agarwal
Date March 25, 2013, 12:27 a.m.
Message ID <5cd3c7ddca7d3a7c2186.1364171267@sid0x220>
Download mbox | patch
Permalink /patch/1178/
State Rejected
Headers show

Comments

Siddharth Agarwal - March 25, 2013, 12:27 a.m.
# HG changeset patch
# User Siddharth Agarwal <sid0@fb.com>
# Date 1364156187 25200
#      Sun Mar 24 13:16:27 2013 -0700
# Node ID 5cd3c7ddca7d3a7c2186608abf65080fbf849d47
# Parent  7a4e0dbc4e1c576f4469c9d31e2dca16cc35b790
dicthelpers: implement diff and join for dicts in C

Given two dicts, diff returns a dict containing all the keys that are present
in one dict but not the other, or whose values are different between the
dicts. The values are pairs of the values from the dicts, with missing values
being represented as an optional argument, defaulting to None.

Given two dicts, join performs what is known as an outer join in relational
database land: it returns a dict containing all the keys across both dicts.
The values are pairs as above, except they aren't compared to see if they're
the same.

Patch

diff --git a/mercurial/dicthelpers.c b/mercurial/dicthelpers.c
--- a/mercurial/dicthelpers.c
+++ b/mercurial/dicthelpers.c
@@ -8,14 +8,114 @@ 
  */
 
 #include <Python.h>
-#include <stdlib.h>
-#include <string.h>
 
 #include "util.h"
 
 static char dicthelpers_doc[] = "Fast dict operations.";
 
+/*
+ * diff and (outer) join are conceptually very different, but have
+ * fundamentally the same algorithm. The only difference is whether we compare
+ * the values for a given key.
+ */
+static PyObject *_diffjoin(PyObject *self, PyObject *args, int compare)
+{
+	PyObject *dict1, *dict2;
+	PyObject *deflt = Py_None;
+	Py_ssize_t pos;
+	PyObject *k1, *v1, *k2, *v2;
+	PyObject *res = NULL;
+	PyObject *newvalue = NULL;
+
+	if (!PyArg_ParseTuple(args, "O!O!|O",
+			      &PyDict_Type, &dict1,
+			      &PyDict_Type, &dict2,
+			      &deflt))
+		goto fail;
+
+	res = PyDict_New();
+	if (res == NULL)
+		goto fail;
+
+	if (dict1 == dict2 && compare) {
+		/* same dict, so diff is empty */
+		return res;
+	}
+
+	/* elements of dict1 first */
+	pos = 0;
+	while (PyDict_Next(dict1, &pos, &k1, &v1)) {
+		newvalue = NULL;
+		v2 = PyDict_GetItem(dict2, k1);
+		if (v2) {
+			if (compare) {
+				int cmp = PyObject_RichCompareBool(v1, v2,
+								   Py_EQ);
+				if (cmp == -1)
+					goto fail;
+				if (cmp == 1)
+					continue;
+			}
+
+			newvalue = PyTuple_Pack(2, v1, v2);
+			if (newvalue == NULL)
+				goto fail;
+		}
+		else {
+			newvalue = PyTuple_Pack(2, v1, deflt);
+			if (newvalue == NULL)
+				goto fail;
+		}
+		if (PyDict_SetItem(res, k1, newvalue) == -1)
+			goto fail;
+		Py_DECREF(newvalue);
+	}
+
+	if (dict1 == dict2) {
+		return res;
+	}
+
+	/* then dict2 */
+	pos = 0;
+	while (PyDict_Next(dict2, &pos, &k2, &v2)) {
+		newvalue = NULL;
+		v1 = PyDict_GetItem(dict1, k2);
+		if (v1) {
+			/* already visited above */
+			continue;
+		}
+		newvalue = PyTuple_Pack(2, deflt, v2);
+		if (newvalue == NULL)
+			goto fail;
+
+		if (PyDict_SetItem(res, k2, newvalue) == -1)
+			goto fail;
+		Py_DECREF(newvalue);
+	}
+
+	return res;
+fail:
+	Py_XDECREF(newvalue);
+	Py_XDECREF(res);
+	return NULL;
+}
+
+static PyObject *diff(PyObject *self, PyObject *args)
+{
+	return _diffjoin(self, args, 1);
+}
+
+static PyObject *join(PyObject *self, PyObject *args)
+{
+	return _diffjoin(self, args, 0);
+}
+
+
 static PyMethodDef methods[] = {
+	{"diff", diff, METH_VARARGS,
+	 "return missing or different items given two dicts"},
+	{"join", join, METH_VARARGS,
+	 "outer join by key for two dicts"},
 	{NULL, NULL}
 };