Patchwork D10772: revlogv2: use a unique filename for data

login
register
mail settings
Submitter phabricator
Date May 27, 2021, 7:06 a.m.
Message ID <differential-rev-PHID-DREV-lgbcnqslilydwgay7jz6-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/49095/
State Superseded
Headers show

Comments

phabricator - May 27, 2021, 7:06 a.m.
marmoute created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Having a unique data will allow for ambiguity less rewriting of revlog content,
  something useful to clarify handling of some operation like censoring or
  stripping.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10772

AFFECTED FILES
  mercurial/revlog.py
  mercurial/revlogutils/docket.py
  mercurial/store.py
  tests/test-revlog-v2.t

CHANGE DETAILS




To: marmoute, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/test-revlog-v2.t b/tests/test-revlog-v2.t
--- a/tests/test-revlog-v2.t
+++ b/tests/test-revlog-v2.t
@@ -58,6 +58,7 @@ 
   date:        Thu Jan 01 00:00:00 1970 +0000
   summary:     initial
   
+
 Header written as expected
 
   $ f --hexdump --bytes 4 .hg/store/00changelog.i
@@ -77,9 +78,9 @@ 
 - a data file
 
   $ ls .hg/store/00changelog* .hg/store/00manifest*
-  .hg/store/00changelog-b870a51b.idx
-  .hg/store/00changelog.d
+  .hg/store/00changelog-6b8ab34b.dat
+  .hg/store/00changelog-88698448.idx
   .hg/store/00changelog.i
-  .hg/store/00manifest-88698448.idx
-  .hg/store/00manifest.d
+  .hg/store/00manifest-1335303a.dat
+  .hg/store/00manifest-b875dfc5.idx
   .hg/store/00manifest.i
diff --git a/mercurial/store.py b/mercurial/store.py
--- a/mercurial/store.py
+++ b/mercurial/store.py
@@ -389,7 +389,14 @@ 
 ]
 
 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
-REVLOG_FILES_OTHER_EXT = (b'.idx', b'.d', b'.n', b'.nd', b'd.tmpcensored')
+REVLOG_FILES_OTHER_EXT = (
+    b'.idx',
+    b'.d',
+    b'.dat',
+    b'.n',
+    b'.nd',
+    b'd.tmpcensored',
+)
 # files that are "volatile" and might change between listing and streaming
 #
 # note: the ".nd" file are nodemap data and won't "change" but they might be
@@ -414,6 +421,7 @@ 
         if f.endswith(REVLOG_FILES_VOLATILE_EXT):
             t |= FILEFLAGS_VOLATILE
         return t
+    return None
 
 
 # the file is part of changelog data
@@ -753,6 +761,7 @@ 
             ef = self.encode(f)
             try:
                 t = revlog_type(f)
+                assert t is not None, f
                 t |= FILEFLAGS_FILELOG
                 yield t, f, ef, self.getsize(ef)
             except OSError as err:
diff --git a/mercurial/revlogutils/docket.py b/mercurial/revlogutils/docket.py
--- a/mercurial/revlogutils/docket.py
+++ b/mercurial/revlogutils/docket.py
@@ -89,12 +89,13 @@ 
 #          |   This is mandatory as docket must be compatible with the previous
 #          |   revlog index header.
 # * 1 bytes: size of index uuid
+# * 1 bytes: size of data uuid
 # * 8 bytes: size of index-data
 # * 8 bytes: pending size of index-data
 # * 8 bytes: size of data
 # * 8 bytes: pending size of data
 # * 1 bytes: default compression header
-S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BLLLLc')
+S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BBLLLLc')
 
 
 class RevlogDocket(object):
@@ -106,6 +107,7 @@ 
         use_pending=False,
         version_header=None,
         index_uuid=None,
+        data_uuid=None,
         index_end=0,
         pending_index_end=0,
         data_end=0,
@@ -119,6 +121,7 @@ 
         self._path = revlog._docket_file
         self._opener = revlog.opener
         self._index_uuid = index_uuid
+        self._data_uuid = data_uuid
         # thes asserts should be True as long as we have a single index filename
         assert index_end <= pending_index_end
         assert data_end <= pending_data_end
@@ -141,6 +144,13 @@ 
             self._index_uuid = make_uid()
         return b"%s-%s.idx" % (self._radix, self._index_uuid)
 
+    def data_filepath(self):
+        """file path to the current index file associated to this docket"""
+        # very simplistic version at first
+        if self._data_uuid is None:
+            self._data_uuid = make_uid()
+        return b"%s-%s.dat" % (self._radix, self._data_uuid)
+
     @property
     def index_end(self):
         return self._index_end
@@ -195,6 +205,7 @@ 
         data = (
             self._version_header,
             len(self._index_uuid),
+            len(self._data_uuid),
             official_index_end,
             self._index_end,
             official_data_end,
@@ -204,6 +215,7 @@ 
         s = []
         s.append(S_HEADER.pack(*data))
         s.append(self._index_uuid)
+        s.append(self._data_uuid)
         return b''.join(s)
 
 
@@ -230,16 +242,20 @@ 
     index_uuid_size = header[1]
     index_uuid = data[offset : offset + index_uuid_size]
     offset += index_uuid_size
-    index_size = header[2]
-    pending_index_size = header[3]
-    data_size = header[4]
-    pending_data_size = header[5]
-    default_compression_header = header[6]
+    data_uuid_size = header[2]
+    data_uuid = data[offset : offset + data_uuid_size]
+    offset += data_uuid_size
+    index_size = header[3]
+    pending_index_size = header[4]
+    data_size = header[5]
+    pending_data_size = header[6]
+    default_compression_header = header[7]
     docket = RevlogDocket(
         revlog,
         use_pending=use_pending,
         version_header=version_header,
         index_uuid=index_uuid,
+        data_uuid=data_uuid,
         index_end=index_size,
         pending_index_end=pending_index_size,
         data_end=data_size,
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -628,7 +628,9 @@ 
             # main docket, so disable it for now.
             self._nodemap_file = None
 
-        if self.postfix is None:
+        if self._docket is not None:
+            self._datafile = self._docket.data_filepath()
+        elif self.postfix is None:
             self._datafile = b'%s.d' % self.radix
         else:
             self._datafile = b'%s.d.%s' % (self.radix, self.postfix)