Patchwork D2630: xdiff: move hashtable calculation to a separate function

login
register
mail settings
Submitter phabricator
Date March 4, 2018, 2:51 a.m.
Message ID <differential-rev-PHID-DREV-iaeynifviotehcizbgxr-req@phab.mercurial-scm.org>
Download mbox | patch
Permalink /patch/28894/
State Superseded
Headers show

Comments

phabricator - March 4, 2018, 2:51 a.m.
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Before running the main diff algorithm, xdiff will "prepare" the contexts
  for both files. That includes splitting, hashing all the lines, and building
  hash tables for those lines. The hash table building process could be
  expensive. Moving it out so it can be optimized separately.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2630

AFFECTED FILES
  mercurial/thirdparty/xdiff/xprepare.c

CHANGE DETAILS




To: quark, #hg-reviewers
Cc: mercurial-devel

Patch

diff --git a/mercurial/thirdparty/xdiff/xprepare.c b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -157,36 +157,25 @@ 
 
 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
 			   xdlclassifier_t *cf, xdfile_t *xdf) {
-	unsigned int hbits;
-	long nrec, hsize, bsize;
+	long nrec, bsize;
 	unsigned long hav;
 	char const *blk, *cur, *top, *prev;
 	xrecord_t *crec;
 	xrecord_t **recs, **rrecs;
-	xrecord_t **rhash;
 	unsigned long *ha;
 	char *rchg;
 	long *rindex;
 
 	ha = NULL;
 	rindex = NULL;
 	rchg = NULL;
-	rhash = NULL;
 	recs = NULL;
 
 	if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0)
 		goto abort;
 	if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *))))
 		goto abort;
 
-	{
-		hbits = xdl_hashbits((unsigned int) narec);
-		hsize = 1 << hbits;
-		if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
-			goto abort;
-		memset(rhash, 0, hsize * sizeof(xrecord_t *));
-	}
-
 	nrec = 0;
 	if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) {
 		for (top = blk + bsize; cur < top; ) {
@@ -204,9 +193,6 @@ 
 			crec->size = (long) (cur - prev);
 			crec->ha = hav;
 			recs[nrec++] = crec;
-
-			if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
-				goto abort;
 		}
 	}
 
@@ -221,27 +207,60 @@ 
 
 	xdf->nrec = nrec;
 	xdf->recs = recs;
-	xdf->hbits = hbits;
-	xdf->rhash = rhash;
 	xdf->rchg = rchg + 1;
 	xdf->rindex = rindex;
 	xdf->nreff = 0;
 	xdf->ha = ha;
 	xdf->dstart = 0;
 	xdf->dend = nrec - 1;
 
+	/* use xdl_prepare_hashtable to set them */
+	xdf->hbits = 0;
+	xdf->rhash = NULL;
+
 	return 0;
 
 abort:
 	xdl_free(ha);
 	xdl_free(rindex);
 	xdl_free(rchg);
-	xdl_free(rhash);
 	xdl_free(recs);
 	xdl_cha_free(&xdf->rcha);
 	return -1;
 }
 
+/*
+ * Adjust hash values for records (lines) in a file so the hash values become
+ * unique. This makes future calculation faster - they can just compare "ha"
+ * instead of comparing line content.
+ */
+static int xdl_prepare_hashtable(unsigned int pass, mmfile_t *mf,
+		xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) {
+	xrecord_t **rhash = NULL;
+	long nrec = xdf->nrec;
+	unsigned int hbits;
+	long hsize;
+	long i;
+
+	hbits = xdl_hashbits((unsigned int) nrec);
+	hsize = 1 << hbits;
+	if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
+		goto abort;
+	memset(rhash, 0, hsize * sizeof(xrecord_t *));
+
+	for (i = 0; i < nrec; ++i) {
+		if (xdl_classify_record(pass, cf, rhash, hbits, xdf->recs[i]) < 0)
+			goto abort;
+	}
+
+	xdf->hbits = hbits;
+	xdf->rhash = rhash;
+
+	return 0;
+abort:
+	xdl_free(rhash);
+	return -1;
+}
 
 static void xdl_free_ctx(xdfile_t *xdf) {
 
@@ -288,6 +307,19 @@ 
 		return -1;
 	}
 
+	if (xdl_prepare_hashtable(1, mf1, xpp, &cf, &xe->xdf1) < 0) {
+		xdl_free_ctx(&xe->xdf1);
+		xdl_free_ctx(&xe->xdf2);
+		xdl_free_classifier(&cf);
+		return -1;
+	}
+	if (xdl_prepare_hashtable(2, mf2, xpp, &cf, &xe->xdf2) < 0) {
+		xdl_free_ctx(&xe->xdf1);
+		xdl_free_ctx(&xe->xdf2);
+		xdl_free_classifier(&cf);
+		return -1;
+	}
+
 	if (xdl_cleanup_records(&cf, &xe->xdf1, &xe->xdf2) < 0) {
 		xdl_free_ctx(&xe->xdf2);
 		xdl_free_ctx(&xe->xdf1);