Patchwork D10899: censor: extract the part about writing the other revision in a function

login
register
mail settings
Submitter phabricator
Date June 22, 2021, 11:27 p.m.
Message ID <differential-rev-PHID-DREV-nw4r3xyj5ojvlmk5vobk-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/49212/
State Superseded
Headers show

Comments

phabricator - June 22, 2021, 11:27 p.m.
marmoute created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  The v2_censor function is huge, now that its content has settled a bit it is a
  good time to split individual part inside dedicated function.
  
  We continue with a small function that add a non-censored revision back to the
  revlog.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10899

AFFECTED FILES
  mercurial/revlogutils/rewrite.py

CHANGE DETAILS




To: marmoute, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/mercurial/revlogutils/rewrite.py b/mercurial/revlogutils/rewrite.py
--- a/mercurial/revlogutils/rewrite.py
+++ b/mercurial/revlogutils/rewrite.py
@@ -270,72 +270,97 @@ 
                 tombstone,
             )
 
-            #### Writing all subsequent revisions
+            # Writing all subsequent revisions
             for rev in range(censor_rev + 1, len(old_index)):
-                entry = old_index[rev]
-                flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
-                old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
+                _rewrite_simple(
+                    rl,
+                    old_index,
+                    open_files,
+                    rev,
+                    rewritten_entries,
+                    tmp_storage,
+                )
+    docket.write(transaction=None, stripping=True)
+
 
-                if rev not in rewritten_entries:
-                    old_data_file.seek(old_data_offset)
-                    new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
-                    new_data = old_data_file.read(new_data_size)
-                    data_delta_base = entry[ENTRY_DELTA_BASE]
-                    d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
-                else:
-                    (
-                        data_delta_base,
-                        start,
-                        end,
-                        d_comp_mode,
-                    ) = rewritten_entries[rev]
-                    new_data_size = end - start
-                    tmp_storage.seek(start)
-                    new_data = tmp_storage.read(new_data_size)
-
-                # It might be faster to group continuous read/write operation,
-                # however, this is censor, an operation that is not focussed
-                # around stellar performance. So I have not written this
-                # optimisation yet.
-                new_data_offset = new_data_file.tell()
-                new_data_file.write(new_data)
+def _rewrite_simple(
+    revlog,
+    old_index,
+    all_files,
+    rev,
+    rewritten_entries,
+    tmp_storage,
+):
+    """append a "normal" revision to the index"""
+    (
+        old_data_file,
+        old_sidedata_file,
+        new_index_file,
+        new_data_file,
+        new_sidedata_file,
+    ) = all_files
+    entry = old_index[rev]
+    flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
+    old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
 
-                sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
-                new_sidedata_offset = new_sidedata_file.tell()
-                if 0 < sidedata_size:
-                    old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
-                    old_sidedata_file.seek(old_sidedata_offset)
-                    new_sidedata = old_sidedata_file.read(sidedata_size)
-                    new_sidedata_file.write(new_sidedata)
+    if rev not in rewritten_entries:
+        old_data_file.seek(old_data_offset)
+        new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
+        new_data = old_data_file.read(new_data_size)
+        data_delta_base = entry[ENTRY_DELTA_BASE]
+        d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
+    else:
+        (
+            data_delta_base,
+            start,
+            end,
+            d_comp_mode,
+        ) = rewritten_entries[rev]
+        new_data_size = end - start
+        tmp_storage.seek(start)
+        new_data = tmp_storage.read(new_data_size)
 
-                data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
-                sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
-                assert data_delta_base <= rev, (data_delta_base, rev)
+    # It might be faster to group continuous read/write operation,
+    # however, this is censor, an operation that is not focussed
+    # around stellar performance. So I have not written this
+    # optimisation yet.
+    new_data_offset = new_data_file.tell()
+    new_data_file.write(new_data)
 
-                new_entry = revlogutils.entry(
-                    flags=flags,
-                    data_offset=new_data_offset,
-                    data_compressed_length=new_data_size,
-                    data_uncompressed_length=data_uncompressed_length,
-                    data_delta_base=data_delta_base,
-                    link_rev=entry[ENTRY_LINK_REV],
-                    parent_rev_1=entry[ENTRY_PARENT_1],
-                    parent_rev_2=entry[ENTRY_PARENT_2],
-                    node_id=entry[ENTRY_NODE_ID],
-                    sidedata_offset=new_sidedata_offset,
-                    sidedata_compressed_length=sidedata_size,
-                    data_compression_mode=d_comp_mode,
-                    sidedata_compression_mode=sd_com_mode,
-                )
-                rl.index.append(new_entry)
-                entry_bin = rl.index.entry_binary(rev)
-                new_index_file.write(entry_bin)
+    sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
+    new_sidedata_offset = new_sidedata_file.tell()
+    if 0 < sidedata_size:
+        old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
+        old_sidedata_file.seek(old_sidedata_offset)
+        new_sidedata = old_sidedata_file.read(sidedata_size)
+        new_sidedata_file.write(new_sidedata)
+
+    data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
+    sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
+    assert data_delta_base <= rev, (data_delta_base, rev)
 
-                docket.index_end = new_index_file.tell()
-                docket.data_end = new_data_file.tell()
-                docket.sidedata_end = new_sidedata_file.tell()
+    new_entry = revlogutils.entry(
+        flags=flags,
+        data_offset=new_data_offset,
+        data_compressed_length=new_data_size,
+        data_uncompressed_length=data_uncompressed_length,
+        data_delta_base=data_delta_base,
+        link_rev=entry[ENTRY_LINK_REV],
+        parent_rev_1=entry[ENTRY_PARENT_1],
+        parent_rev_2=entry[ENTRY_PARENT_2],
+        node_id=entry[ENTRY_NODE_ID],
+        sidedata_offset=new_sidedata_offset,
+        sidedata_compressed_length=sidedata_size,
+        data_compression_mode=d_comp_mode,
+        sidedata_compression_mode=sd_com_mode,
+    )
+    revlog.index.append(new_entry)
+    entry_bin = revlog.index.entry_binary(rev)
+    new_index_file.write(entry_bin)
 
-    docket.write(transaction=None, stripping=True)
+    revlog._docket.index_end = new_index_file.tell()
+    revlog._docket.data_end = new_data_file.tell()
+    revlog._docket.sidedata_end = new_sidedata_file.tell()
 
 
 def _rewrite_censor(