Patchwork D2686: xdiff: add a preprocessing step that trims files

login
register
mail settings
Submitter phabricator
Date March 5, 2018, 1:09 a.m.
Message ID <426a47e90a23063cec125459f1a86bb8@localhost.localdomain>
Download mbox | patch
Permalink /patch/29037/
State Not Applicable
Headers show

Comments

phabricator - March 5, 2018, 1:09 a.m.
quark updated this revision to Diff 6645.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2686?vs=6644&id=6645

REVISION DETAIL
  https://phab.mercurial-scm.org/D2686

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xemit.c
  mercurial/thirdparty/xdiff/xprepare.c
  mercurial/thirdparty/xdiff/xprepare.h
  mercurial/thirdparty/xdiff/xtypes.h

CHANGE DETAILS




To: quark, #hg-reviewers
Cc: mercurial-devel

Patch

diff --git a/mercurial/thirdparty/xdiff/xtypes.h b/mercurial/thirdparty/xdiff/xtypes.h
--- a/mercurial/thirdparty/xdiff/xtypes.h
+++ b/mercurial/thirdparty/xdiff/xtypes.h
@@ -60,6 +60,7 @@ 
 
 typedef struct s_xdfenv {
 	xdfile_t xdf1, xdf2;
+	long prefix_lines, suffix_lines;
 } xdfenv_t;
 
 
diff --git a/mercurial/thirdparty/xdiff/xprepare.h b/mercurial/thirdparty/xdiff/xprepare.h
--- a/mercurial/thirdparty/xdiff/xprepare.h
+++ b/mercurial/thirdparty/xdiff/xprepare.h
@@ -26,7 +26,7 @@ 
 
 
 int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
-		    xdfenv_t *xe);
+		    xdfenv_t *xe, xdemitconf_t const *xecfg);
 void xdl_free_env(xdfenv_t *xe);
 
 
diff --git a/mercurial/thirdparty/xdiff/xprepare.c b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -61,6 +61,8 @@ 
 static void xdl_free_ctx(xdfile_t *xdf);
 static int xdl_clean_mmatch(char const *dis, long i, long s, long e);
 static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
+static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, long reserved,
+		xdfenv_t* xe, mmfile_t *out_mf1, mmfile_t *out_mf2);
 static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2);
 static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
 
@@ -156,6 +158,105 @@ 
 }
 
 
+/*
+ * Trim common prefix from files.
+ * Note: need to preserve some lines for shifting to work. For example,
+ *
+ *     a.py    | common | b.py    | common | diff
+ *             | p | s  |         | p | s  |
+ *     -------------------------------------------
+ *     try:    | Y      | try:    | Y      |
+ *       1     | Y      |   1     | Y      |
+ *     except: | Y  y   | except: | Y      |
+ *       pass  | Y  y   |   pass  | Y      |
+ *     try:    | Y  y   | try:    | Y      | +
+ *       3     |    Y   |   2     |        | +
+ *     except: |    Y   | except: |     y  | +
+ *       pass  |    Y   |   pass  |     y  | +
+ *             |        | try:    |     y  |
+ *             |        |   3     |     Y  |
+ *             |        | except: |     Y  |
+ *             |        |   pass  |     Y  |
+ *
+ * To make shifting work, common suffix is handled so it cannot overlap with
+ * common prefix (in the above case, lines marked with "y" will not count as
+ * common suffix). Then subtract some O(1) lines leaving room for shifting.
+ *
+ * Find common prefix first, since the diff algorithm moves hunks towards the
+ * end. For example,
+ *
+ *     #!python
+ *     open('a','w').write('x\n'* 1000)
+ *     open('b','w').write('x\n'* 1001)
+ *
+ * Diffing the two files in either direction, will print a hunk at the end.
+ */
+static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, long reserved,
+		xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) {
+	mmfile_t msmall, mlarge;
+	long plines = 0, pbytes = 0, slines = 0, sbytes = 0, i;
+	const char *pp1, *pp2, *ps1, *ps2;
+
+	/* reserved must >= 0 for the line boundary adjustment to work */
+	if (reserved < 0)
+		reserved = 0;
+
+	if (mf1->size < mf2->size) {
+		memcpy(&msmall, mf1, sizeof(mmfile_t));
+		memcpy(&mlarge, mf2, sizeof(mmfile_t));
+	} else {
+		memcpy(&msmall, mf2, sizeof(mmfile_t));
+		memcpy(&mlarge, mf1, sizeof(mmfile_t));
+	}
+
+	pp1 = msmall.ptr, pp2 = mlarge.ptr;
+	for (i = 0; i < msmall.size && *pp1 == *pp2; ++i) {
+		plines += (*pp1 == '\n');
+		pp1++, pp2++;
+	}
+
+	ps1 = msmall.ptr + msmall.size - 1, ps2 = mlarge.ptr + mlarge.size - 1;
+	for (; ps1 > pp1 && *ps1 == *ps2; ++i) {
+		slines += (*ps1 == '\n');
+		ps1--, ps2--;
+	}
+
+	/* Retract common prefix and suffix boundaries for reserved lines */
+	if (plines <= reserved + 1) {
+		plines = 0;
+	} else {
+		for (i = 0; i <= reserved;) {
+			pp1--;
+			i += (*pp1 == '\n');
+		}
+		/* The new mmfile starts at the next char just after '\n' */
+		pbytes = pp1 - msmall.ptr + 1;
+		plines -= reserved;
+	}
+
+	if (slines <= reserved + 1) {
+		slines = 0;
+	} else {
+		for (i = 0; i <= reserved;) {
+			ps1++;
+			i += (*ps1 == '\n');
+		}
+		/* The new mmfile includes this '\n' */
+		sbytes = msmall.ptr + msmall.size - ps1 - 1;
+		slines -= reserved;
+		if (msmall.ptr[msmall.size - 1] == '\n')
+			slines -= 1;
+	}
+
+	xe->prefix_lines = plines;
+	xe->suffix_lines = slines;
+	out_mf1->ptr = mf1->ptr + pbytes;
+	out_mf1->size = mf1->size - pbytes - sbytes;
+	out_mf2->ptr = mf2->ptr + pbytes;
+	out_mf2->size = mf2->size - pbytes - sbytes;
+}
+
+
 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
 			   xdlclassifier_t *cf, xdfile_t *xdf) {
 	unsigned int hbits;
@@ -254,10 +355,14 @@ 
 	xdl_cha_free(&xdf->rcha);
 }
 
+/* Minimal reserved lines during file trimming. This is to leave room for
+ * shifting */
+#define MIN_RESERVED_LINES 100
 
 int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
-		    xdfenv_t *xe) {
-	long enl1, enl2, sample;
+		    xdfenv_t *xe, xdemitconf_t const *xecfg) {
+	mmfile_t tmf1, tmf2;
+	long enl1, enl2, sample, reserved_lines = MIN_RESERVED_LINES;
 	xdlclassifier_t cf;
 
 	memset(&cf, 0, sizeof(cf));
@@ -270,12 +375,16 @@ 
 	if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
 		return -1;
 
-	if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
+	if (xecfg)
+		reserved_lines += xecfg->ctxlen;
+	xdl_trim_files(mf1, mf2, reserved_lines, xe, &tmf1, &tmf2);
+
+	if (xdl_prepare_ctx(1, &tmf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
 
 		xdl_free_classifier(&cf);
 		return -1;
 	}
-	if (xdl_prepare_ctx(2, mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
+	if (xdl_prepare_ctx(2, &tmf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
 
 		xdl_free_ctx(&xe->xdf1);
 		xdl_free_classifier(&cf);
diff --git a/mercurial/thirdparty/xdiff/xemit.c b/mercurial/thirdparty/xdiff/xemit.c
--- a/mercurial/thirdparty/xdiff/xemit.c
+++ b/mercurial/thirdparty/xdiff/xemit.c
@@ -169,6 +169,7 @@ 
 	long s1, s2, e1, e2, lctx;
 	xdchange_t *xch, *xche;
 	long funclineprev = -1;
+	long p = xe->prefix_lines;
 	struct func_line func_line = { 0 };
 
 	for (xch = xscr; xch; xch = xche->next) {
@@ -261,7 +262,7 @@ 
 				      s1 - 1, funclineprev);
 			funclineprev = s1 - 1;
 		}
-		if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
+		if (xdl_emit_hunk_hdr(s1 + 1 + p, e1 - s1, s2 + 1 + p, e2 - s2,
 				      func_line.buf, func_line.len, ecb) < 0)
 			return -1;
 
diff --git a/mercurial/thirdparty/xdiff/xdiffi.c b/mercurial/thirdparty/xdiff/xdiffi.c
--- a/mercurial/thirdparty/xdiff/xdiffi.c
+++ b/mercurial/thirdparty/xdiff/xdiffi.c
@@ -325,14 +325,14 @@ 
 }
 
 
-int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
-		xdfenv_t *xe) {
+int xdl_do_diff2(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+		 xdfenv_t *xe, xdemitconf_t const *xecfg) {
 	long ndiags;
 	long *kvd, *kvdf, *kvdb;
 	xdalgoenv_t xenv;
 	diffdata_t dd1, dd2;
 
-	if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
+	if (xdl_prepare_env(mf1, mf2, xpp, xe, xecfg) < 0) {
 
 		return -1;
 	}
@@ -381,6 +381,12 @@ 
 }
 
 
+int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+		xdfenv_t *xe) {
+	return xdl_do_diff2(mf1, mf2, xpp, xe, NULL);
+}
+
+
 static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
 	xdchange_t *xch;
 
@@ -1010,30 +1016,38 @@ 
 static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 			      xdemitconf_t const *xecfg)
 {
+	long p = xe->prefix_lines, s = xe->suffix_lines;
 	xdchange_t *xch, *xche;
 	if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) {
 		long i1 = 0, i2 = 0, n1 = xe->xdf1.nrec, n2 = xe->xdf2.nrec;
 		for (xch = xscr; xch; xch = xche->next) {
 			xche = xdl_get_hunk(&xch, xecfg);
 			if (!xch)
 				break;
+			if (xch != xche)
+				xdl_bug("xch != xche");
+			xch->i1 += p;
+			xch->i2 += p;
 			if (xch->i1 > i1 || xch->i2 > i2) {
-				if (xecfg->hunk_func(i1, xch->i1, i2, xch->i2, ecb->priv) < 0)
+				if (xecfg->hunk_func(i1, xch->i1, i2, xch->i2,
+						     ecb->priv) < 0)
 					return -1;
 			}
-			i1 = xche->i1 + xche->chg1;
-			i2 = xche->i2 + xche->chg2;
+			i1 = xch->i1 + xch->chg1;
+			i2 = xch->i2 + xch->chg2;
 		}
-		if (xecfg->hunk_func(i1, n1, i2, n2, ecb->priv) < 0)
+		if (xecfg->hunk_func(i1, n1 + p + s, i2, n2 + p + s,
+				     ecb->priv) < 0)
 			return -1;
 	} else {
 		for (xch = xscr; xch; xch = xche->next) {
 			xche = xdl_get_hunk(&xch, xecfg);
 			if (!xch)
 				break;
-			if (xecfg->hunk_func(
-					xch->i1, xche->i1 + xche->chg1 - xch->i1,
-					xch->i2, xche->i2 + xche->chg2 - xch->i2,
+			if (xecfg->hunk_func(xch->i1 + p,
+					xche->i1 + xche->chg1 - xch->i1 + p,
+					xch->i2 + p,
+					xche->i2 + xche->chg2 - xch->i2 + p,
 					ecb->priv) < 0)
 				return -1;
 		}
@@ -1068,7 +1082,7 @@ 
 	xdfenv_t xe;
 	emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;
 
-	if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
+	if (xdl_do_diff2(mf1, mf2, xpp, &xe, xecfg) < 0) {
 
 		return -1;
 	}