Patchwork D11262: repair: improve performance of detection of revisions affected by issue6528

login
register
mail settings
Submitter phabricator
Date Aug. 6, 2021, 10:12 a.m.
Message ID <differential-rev-PHID-DREV-szxttatogblmwo7ambfr-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/49576/
State Superseded
Headers show

Comments

phabricator - Aug. 6, 2021, 10:12 a.m.
Alphare created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Explanations inside the patch. I've tested this on Mozilla-Central and it's
  5 times faster than the naive approach on my laptop.

REPOSITORY
  rHG Mercurial

BRANCH
  stable

REVISION DETAIL
  https://phab.mercurial-scm.org/D11262

AFFECTED FILES
  mercurial/revlogutils/rewrite.py

CHANGE DETAILS




To: Alphare, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/mercurial/revlogutils/rewrite.py b/mercurial/revlogutils/rewrite.py
--- a/mercurial/revlogutils/rewrite.py
+++ b/mercurial/revlogutils/rewrite.py
@@ -10,6 +10,7 @@ 
 import binascii
 import contextlib
 import os
+import struct
 
 from ..node import (
     nullrev,
@@ -561,7 +562,7 @@ 
             util.tryunlink(new_file_path)
 
 
-def _is_revision_affected(ui, fl, filerev, path):
+def _is_revision_affected(ui, fl, filerev):
     """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
     special meaning compared to the reverse in the context of filelog-based
     copytracing. issue6528 exists because new code assumed that parent ordering
@@ -581,6 +582,53 @@ 
     return False
 
 
+def _is_revision_affected_fast(repo, fl, filerev, filename, affected_chain):
+    """Optimization fast-path for `_is_revision_affected`.
+
+    `affected_chain` is a list containing a single boolean (allowing us to
+    mutate it from here), which is `True` if any previous revision in its chain
+    are affected, saving computation of the full text, instead looking at the
+    current delta. If the current delta does *not* touch the metadata parts we
+    can return the chain result, otherwise fall back to the slow method of
+    checking, as we do for snapshots.
+
+    This only works if the revisions are looked at in order."""
+    rl = fl._revlog
+
+    if rl.issnapshot(filerev):
+        is_affected = _is_revision_affected(repo.ui, fl, filerev)
+        affected_chain[0] = is_affected
+        return is_affected
+
+    if rl.iscensored(filerev):
+        # revlogv1 cannot censor snaphots for some reason, only check for deltas
+        # Censored revisions don't contain metadata, so they cannot be affected
+        return False
+
+    chunk = rl._chunk(filerev)
+    if not len(chunk):
+        # No diff for this revision
+        return affected_chain[0]
+
+    chunk_header_length = 12
+    if len(chunk) < chunk_header_length:
+        raise error.Abort(_(b"patch cannot be decoded"))
+
+    start, _end, _length = struct.unpack(b">lll", chunk[:chunk_header_length])
+
+    # This is the expected size of copy metadata, if present
+    expected_metadata = b"\1\ncopy: %s\ncopyrev: %s\1\n"
+    expected_metadata_len = len(expected_metadata % (filename, repo.nullid))
+    if start < expected_metadata_len:
+        # This delta changes (removes, etc.) *something* at an index that could
+        # affect copy metadata, check the old way.
+        is_affected = _is_revision_affected(repo.ui, fl, filerev)
+        affected_chain[0] = is_affected
+        return is_affected
+
+    return affected_chain[0]
+
+
 def _from_report(ui, repo, context, from_report, dry_run):
     """
     Fix the revisions given in the `from_report` file, but still checks if the
@@ -603,7 +651,7 @@ 
             excluded = set()
 
             for filerev in to_fix:
-                if _is_revision_affected(ui, fl, filerev, filename):
+                if _is_revision_affected(ui, fl, filerev):
                     msg = b"found affected revision %d for filelog '%s'\n"
                     ui.warn(msg % (filerev, filename))
                 else:
@@ -663,11 +711,11 @@ 
 
             # Set of filerevs (or hex filenodes if `to_report`) that need fixing
             to_fix = set()
+            affected_chain = [False]
             for filerev in fl.revs():
-                # TODO speed up by looking at the start of the delta
-                # If it hasn't changed, it's not worth looking at the other revs
-                # in the same chain
-                affected = _is_revision_affected(ui, fl, filerev, path)
+                affected = _is_revision_affected_fast(
+                    repo, fl, filerev, filename, affected_chain
+                )
                 if affected:
                     msg = b"found affected revision %d for filelog '%s'\n"
                     ui.warn(msg % (filerev, path))