From patchwork Sat Oct 12 16:46:14 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: D7068: sidedatacopies: only store an entry if it has values From: phabricator X-Patchwork-Id: 42268 Message-Id: To: Phabricator Cc: mercurial-devel@mercurial-scm.org Date: Sat, 12 Oct 2019 16:46:14 +0000 marmoute created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY This will make for a shorter storage and help use to write faster code in simple case. This change already provided a speed boost for copy tracing. For example here is combined time of running copies tracing on a 6989 pairs of revision on the pypy repos: before: 771s after: 631s - 18% This also has a very positive impact on changelog size. For example here are the number for the `00changelog.d` file of pypy. before: 30449712 Bytes after: 24973718 Bytes - 18% This give an overall quite acceptable overhead for storing copies into the changelog: filelog-only: 23370586 sidedata: 24973718 Bytes + 7% REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D7068 AFFECTED FILES mercurial/changelog.py mercurial/copies.py tests/test-copies-in-changeset.t CHANGE DETAILS To: marmoute, #hg-reviewers Cc: mercurial-devel diff --git a/tests/test-copies-in-changeset.t b/tests/test-copies-in-changeset.t --- a/tests/test-copies-in-changeset.t +++ b/tests/test-copies-in-changeset.t @@ -77,15 +77,11 @@ 2\x00a (esc) #else $ hg debugsidedata -c -v -- -1 - 4 sidedata entries + 2 sidedata entries entry-0010 size 11 '0\x00a\n1\x00a\n2\x00a' - entry-0011 size 0 - '' entry-0012 size 5 '0\n1\n2' - entry-0013 size 0 - '' #endif $ hg showcopies @@ -119,11 +115,9 @@ #else $ hg debugsidedata -c -v -- -1 - 4 sidedata entries + 3 sidedata entries entry-0010 size 3 '1\x00b' - entry-0011 size 0 - '' entry-0012 size 1 '1' entry-0013 size 1 @@ -168,15 +162,9 @@ #else $ hg debugsidedata -c -v -- -1 - 4 sidedata entries + 1 sidedata entries entry-0010 size 4 '0\x00b2' - entry-0011 size 0 - '' - entry-0012 size 0 - '' - entry-0013 size 0 - '' #endif $ hg showcopies @@ -231,15 +219,13 @@ #else $ hg debugsidedata -c -v -- -1 - 4 sidedata entries + 3 sidedata entries entry-0010 size 7 '0\x00a\n2\x00f' entry-0011 size 3 '1\x00d' entry-0012 size 5 '0\n1\n2' - entry-0013 size 0 - '' #endif $ hg showcopies @@ -262,15 +248,11 @@ #else $ hg ci -m 'copy a to j' $ hg debugsidedata -c -v -- -1 - 4 sidedata entries + 2 sidedata entries entry-0010 size 3 '0\x00a' - entry-0011 size 0 - '' entry-0012 size 1 '0' - entry-0013 size 0 - '' #endif $ hg debugdata j 0 \x01 (esc) @@ -297,15 +279,11 @@ $ hg ci --amend -m 'copy a to j, v2' saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob) $ hg debugsidedata -c -v -- -1 - 4 sidedata entries + 2 sidedata entries entry-0010 size 3 '0\x00a' - entry-0011 size 0 - '' entry-0012 size 1 '0' - entry-0013 size 0 - '' #endif $ hg showcopies --config experimental.copies.read-from=filelog-only a -> j @@ -324,15 +302,6 @@ #else $ hg ci -m 'modify j' $ hg debugsidedata -c -v -- -1 - 4 sidedata entries - entry-0010 size 0 - '' - entry-0011 size 0 - '' - entry-0012 size 0 - '' - entry-0013 size 0 - '' #endif Test writing only to filelog @@ -347,15 +316,11 @@ #else $ hg ci -m 'copy a to k' $ hg debugsidedata -c -v -- -1 - 4 sidedata entries + 2 sidedata entries entry-0010 size 3 '0\x00a' - entry-0011 size 0 - '' entry-0012 size 1 '0' - entry-0013 size 0 - '' #endif $ hg debugdata k 0 @@ -470,16 +435,10 @@ compression: zlib zlib zlib compression-level: default default default $ hg debugsidedata -c -- 0 - 4 sidedata entries - entry-0010 size 0 - entry-0011 size 0 + 1 sidedata entries entry-0012 size 1 - entry-0013 size 0 $ hg debugsidedata -c -- 1 - 4 sidedata entries - entry-0010 size 0 - entry-0011 size 0 - entry-0012 size 0 + 1 sidedata entries entry-0013 size 1 $ hg debugsidedata -m -- 0 $ cat << EOF > .hg/hgrc @@ -522,16 +481,10 @@ compression: zlib zlib zlib compression-level: default default default $ hg debugsidedata -c -- 0 - 4 sidedata entries - entry-0010 size 0 - entry-0011 size 0 + 1 sidedata entries entry-0012 size 1 - entry-0013 size 0 $ hg debugsidedata -c -- 1 - 4 sidedata entries - entry-0010 size 0 - entry-0011 size 0 - entry-0012 size 0 + 1 sidedata entries entry-0013 size 1 $ hg debugsidedata -m -- 0 diff --git a/mercurial/copies.py b/mercurial/copies.py --- a/mercurial/copies.py +++ b/mercurial/copies.py @@ -976,10 +976,14 @@ p2copies = encodecopies(sortedfiles, p2copies) filesadded = encodefileindices(sortedfiles, filesadded) filesremoved = encodefileindices(sortedfiles, filesremoved) - sidedata[sidedatamod.SD_P1COPIES] = p1copies - sidedata[sidedatamod.SD_P2COPIES] = p2copies - sidedata[sidedatamod.SD_FILESADDED] = filesadded - sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved + if p1copies: + sidedata[sidedatamod.SD_P1COPIES] = p1copies + if p2copies: + sidedata[sidedatamod.SD_P2COPIES] = p2copies + if filesadded: + sidedata[sidedatamod.SD_FILESADDED] = filesadded + if filesremoved: + sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved return sidedata diff --git a/mercurial/changelog.py b/mercurial/changelog.py --- a/mercurial/changelog.py +++ b/mercurial/changelog.py @@ -693,14 +693,16 @@ extra[b'filesremoved'] = filesremoved elif self._copiesstorage == b'changeset-sidedata': sidedata = {} - if p1copies is not None: + if p1copies: sidedata[sidedatamod.SD_P1COPIES] = p1copies - if p2copies is not None: + if p2copies: sidedata[sidedatamod.SD_P2COPIES] = p2copies - if filesadded is not None: + if filesadded: sidedata[sidedatamod.SD_FILESADDED] = filesadded - if filesremoved is not None: + if filesremoved: sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved + if not sidedata: + sidedata = None if extra: extra = encodeextra(extra)