Patchwork D10352: revlog: introduce an explicit tracking of what the revlog is about

login
register
mail settings
Submitter phabricator
Date April 9, 2021, 12:32 p.m.
Message ID <differential-rev-PHID-DREV-ongjrzjzbozjfkjt3xvr-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48670/
State Superseded
Headers show

Comments

phabricator - April 9, 2021, 12:32 p.m.
Alphare created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Since the dawn of time, people have been forced to rely to lossy introspection
  of the index filename to determine what the purpose and role of the revlog they
  encounter is. This is hacky, error prone, inflexible, abstraction-leaky,
  <insert-your-own-complaints-here>.
  
  In f63299ee7e4d <https://phab.mercurial-scm.org/rHGf63299ee7e4d9bc817dac2edeae7725766dbd0cc> Raphaël introduced a new attribute to track this information:
  `revlog_kind`. However it is initialized in an odd place and various instances
  end up not having it set. In addition is only tracking some of the information
  we end up having to introspect in various pieces of code.
  
  So we add a new attribute that holds more data and is more strictly enforced.
  This work is done in collaboration with Raphaël.
  
  The `revlog_kind` one will be removed/adapted in the next changeset. We expect
  to be able to clean up various existing piece of code and to simplify coming
  work around the newer revlog format.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10352

AFFECTED FILES
  contrib/dumprevlog
  contrib/perf.py
  contrib/undumprevlog
  mercurial/bundlerepo.py
  mercurial/changelog.py
  mercurial/cmdutil.py
  mercurial/filelog.py
  mercurial/manifest.py
  mercurial/revlog.py
  mercurial/revlogutils/constants.py
  mercurial/unionrepo.py
  tests/test-revlog-raw.py
  tests/test-revlog.t

CHANGE DETAILS




To: Alphare, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/test-revlog.t b/tests/test-revlog.t
--- a/tests/test-revlog.t
+++ b/tests/test-revlog.t
@@ -45,9 +45,10 @@ 
        0       2 99e0332bd498 000000000000 000000000000
        1       3 6674f57a23d8 99e0332bd498 000000000000
 
+  >>> from mercurial.revlogutils.constants import KIND_OTHER
   >>> from mercurial import revlog, vfs
   >>> tvfs = vfs.vfs(b'.')
   >>> tvfs.options = {b'revlogv1': True}
-  >>> rl = revlog.revlog(tvfs, b'a.i')
+  >>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), indexfile=b'a.i')
   >>> rl.revision(1)
   mpatchError(*'patch cannot be decoded'*) (glob)
diff --git a/tests/test-revlog-raw.py b/tests/test-revlog-raw.py
--- a/tests/test-revlog-raw.py
+++ b/tests/test-revlog-raw.py
@@ -15,6 +15,7 @@ 
 )
 
 from mercurial.revlogutils import (
+    constants,
     deltas,
     flagutil,
 )
@@ -82,7 +83,9 @@ 
 def newrevlog(name=b'_testrevlog.i', recreate=False):
     if recreate:
         tvfs.tryunlink(name)
-    rlog = revlog.revlog(tvfs, name)
+    rlog = revlog.revlog(
+        tvfs, target=(constants.KIND_OTHER, b'test'), indexfile=name
+    )
     return rlog
 
 
diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py
--- a/mercurial/unionrepo.py
+++ b/mercurial/unionrepo.py
@@ -41,7 +41,11 @@ 
         # To differentiate a rev in the second revlog from a rev in the revlog,
         # we check revision against repotiprev.
         opener = vfsmod.readonlyvfs(opener)
-        revlog.revlog.__init__(self, opener, indexfile)
+        target = getattr(revlog2, 'target', None)
+        if target is None:
+            # a revlog wrapper, eg: the manifestlog that is not an actual revlog
+            target = revlog2._revlog.target
+        revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile)
         self.revlog2 = revlog2
 
         n = len(self)
diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py
--- a/mercurial/revlogutils/constants.py
+++ b/mercurial/revlogutils/constants.py
@@ -13,6 +13,20 @@ 
 
 from ..interfaces import repository
 
+### Internal utily constants
+
+KIND_CHANGELOG = 1001  # over 256 to not be comparable with a bytes
+KIND_MANIFESTLOG = 1002
+KIND_FILELOG = 1003
+KIND_OTHER = 1004
+
+ALL_KINDS = {
+    KIND_CHANGELOG,
+    KIND_MANIFESTLOG,
+    KIND_FILELOG,
+    KIND_OTHER,
+}
+
 ### main revlog header
 
 INDEX_HEADER = struct.Struct(b">I")
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -39,6 +39,7 @@ 
 from .i18n import _
 from .pycompat import getattr
 from .revlogutils.constants import (
+    ALL_KINDS,
     FLAG_GENERALDELTA,
     FLAG_INLINE_DATA,
     INDEX_ENTRY_V0,
@@ -426,7 +427,8 @@ 
     def __init__(
         self,
         opener,
-        indexfile,
+        target,
+        indexfile=None,
         datafile=None,
         checkambig=False,
         mmaplargeindex=False,
@@ -441,6 +443,12 @@ 
         opener is a function that abstracts the file opening operation
         and can be used to implement COW semantics or the like.
 
+        `target`: a (KIND, ID) tuple that identify the content stored in
+        this revlog. It help the rest of the code to understand what the revlog
+        is about without having to resort to heuristic and index filename
+        analysis. Note: that this must be reliably be set by normal code, but
+        that test, debug, or performance measurement code might not set this to
+        accurate value.
         """
         self.upperboundcomp = upperboundcomp
         self.indexfile = indexfile
@@ -452,6 +460,9 @@ 
             )
 
         self.opener = opener
+        assert target[0] in ALL_KINDS
+        assert len(target) == 2
+        self.target = target
         #  When True, indexfile is opened with checkambig=True at writing, to
         #  avoid file stat ambiguity.
         self._checkambig = checkambig
@@ -2993,7 +3004,13 @@ 
         newdatafile = self.datafile + b'.tmpcensored'
 
         # This is a bit dangerous. We could easily have a mismatch of state.
-        newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
+        newrl = revlog(
+            self.opener,
+            target=self.target,
+            indexfile=newindexfile,
+            datafile=newdatafile,
+            censorable=True,
+        )
         newrl.version = self.version
         newrl._generaldelta = self._generaldelta
         newrl._io = self._io
diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -35,6 +35,9 @@ 
     repository,
     util as interfaceutil,
 )
+from .revlogutils import (
+    constants as revlog_constants,
+)
 
 parsers = policy.importmod('parsers')
 propertycache = util.propertycache
@@ -1606,7 +1609,8 @@ 
 
         self._revlog = revlog.revlog(
             opener,
-            indexfile,
+            target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
+            indexfile=indexfile,
             # only root indexfile is cached
             checkambig=not bool(tree),
             mmaplargeindex=True,
diff --git a/mercurial/filelog.py b/mercurial/filelog.py
--- a/mercurial/filelog.py
+++ b/mercurial/filelog.py
@@ -21,13 +21,20 @@ 
     util as interfaceutil,
 )
 from .utils import storageutil
+from .revlogutils import (
+    constants as revlog_constants,
+)
 
 
 @interfaceutil.implementer(repository.ifilestorage)
 class filelog(object):
     def __init__(self, opener, path):
         self._revlog = revlog.revlog(
-            opener, b'/'.join((b'data', path + b'.i')), censorable=True
+            opener,
+            # XXX should use the unencoded path
+            target=(revlog_constants.KIND_FILELOG, path),
+            indexfile=b'/'.join((b'data', path + b'.i')),
+            censorable=True,
         )
         # Full name of the user visible file, relative to the repository root.
         # Used by LFS.
diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -62,6 +62,10 @@ 
     stringutil,
 )
 
+from .revlogutils import (
+    constants as revlog_constants,
+)
+
 if pycompat.TYPE_CHECKING:
     from typing import (
         Any,
@@ -1434,8 +1438,12 @@ 
             raise error.CommandError(cmd, _(b'invalid arguments'))
         if not os.path.isfile(file_):
             raise error.InputError(_(b"revlog '%s' not found") % file_)
+
+        target = (revlog_constants.KIND_OTHER, b'free-form:%s' % file_)
         r = revlog.revlog(
-            vfsmod.vfs(encoding.getcwd(), audit=False), file_[:-2] + b".i"
+            vfsmod.vfs(encoding.getcwd(), audit=False),
+            target=target,
+            indexfile=file_[:-2] + b".i",
         )
     return r
 
diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -26,7 +26,10 @@ 
     dateutil,
     stringutil,
 )
-from .revlogutils import flagutil
+from .revlogutils import (
+    constants as revlog_constants,
+    flagutil,
+)
 
 _defaultextra = {b'branch': b'default'}
 
@@ -402,7 +405,8 @@ 
         revlog.revlog.__init__(
             self,
             opener,
-            indexfile,
+            target=(revlog_constants.KIND_CHANGELOG, None),
+            indexfile=indexfile,
             datafile=datafile,
             checkambig=True,
             mmaplargeindex=True,
diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py
--- a/mercurial/bundlerepo.py
+++ b/mercurial/bundlerepo.py
@@ -44,9 +44,13 @@ 
     vfs as vfsmod,
 )
 
+from .revlogutils import (
+    constants as revlog_constants,
+)
+
 
 class bundlerevlog(revlog.revlog):
-    def __init__(self, opener, indexfile, cgunpacker, linkmapper):
+    def __init__(self, opener, target, indexfile, cgunpacker, linkmapper):
         # How it works:
         # To retrieve a revision, we need to know the offset of the revision in
         # the bundle (an unbundle object). We store this offset in the index
@@ -55,7 +59,7 @@ 
         # To differentiate a rev in the bundle from a rev in the revlog, we
         # check revision against repotiprev.
         opener = vfsmod.readonlyvfs(opener)
-        revlog.revlog.__init__(self, opener, indexfile)
+        revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile)
         self.bundle = cgunpacker
         n = len(self)
         self.repotiprev = n - 1
@@ -169,7 +173,12 @@ 
         changelog.changelog.__init__(self, opener)
         linkmapper = lambda x: x
         bundlerevlog.__init__(
-            self, opener, self.indexfile, cgunpacker, linkmapper
+            self,
+            opener,
+            (revlog_constants.KIND_CHANGELOG, None),
+            self.indexfile,
+            cgunpacker,
+            linkmapper,
         )
 
 
@@ -185,7 +194,12 @@ 
     ):
         manifest.manifestrevlog.__init__(self, nodeconstants, opener, tree=dir)
         bundlerevlog.__init__(
-            self, opener, self.indexfile, cgunpacker, linkmapper
+            self,
+            opener,
+            (revlog_constants.KIND_MANIFESTLOG, dir),
+            self.indexfile,
+            cgunpacker,
+            linkmapper,
         )
         if dirlogstarts is None:
             dirlogstarts = {}
@@ -212,7 +226,12 @@ 
     def __init__(self, opener, path, cgunpacker, linkmapper):
         filelog.filelog.__init__(self, opener, path)
         self._revlog = bundlerevlog(
-            opener, self.indexfile, cgunpacker, linkmapper
+            opener,
+            # XXX should use the unencoded path
+            target=(revlog_constants.KIND_FILELOG, path),
+            indexfile=self.indexfile,
+            cgunpacker=cgunpacker,
+            linkmapper=linkmapper,
         )
 
 
diff --git a/contrib/undumprevlog b/contrib/undumprevlog
--- a/contrib/undumprevlog
+++ b/contrib/undumprevlog
@@ -15,6 +15,10 @@ 
 )
 from mercurial.utils import procutil
 
+from mercurial.revlogutils import (
+    constants as revlog_constants,
+)
+
 for fp in (sys.stdin, sys.stdout, sys.stderr):
     procutil.setbinary(fp)
 
@@ -28,7 +32,11 @@ 
         break
     if l.startswith("file:"):
         f = encoding.strtolocal(l[6:-1])
-        r = revlog.revlog(opener, f)
+        r = revlog.revlog(
+            opener,
+            target=(revlog_constants.KIND_OTHER, b'undump-revlog'),
+            indexfile=f,
+        )
         procutil.stdout.write(b'%s\n' % f)
     elif l.startswith("node:"):
         n = bin(l[6:-1])
diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -66,6 +66,8 @@ 
 import tempfile
 import threading
 import time
+
+import mercurial.revlog
 from mercurial import (
     changegroup,
     cmdutil,
@@ -76,7 +78,6 @@ 
     hg,
     mdiff,
     merge,
-    revlog,
     util,
 )
 
@@ -119,6 +120,21 @@ 
 except ImportError:
     profiling = None
 
+try:
+    from mercurial.revlogutils import constants as revlog_constants
+
+    perf_rl_kind = (revlog_constants.KIND_OTHER, b'created-by-perf')
+
+    def revlog(opener, *args, **kwargs):
+        return mercurial.revlog.revlog(opener, perf_rl_kind, *args, **kwargs)
+
+
+except (ImportError, AttributeError):
+    perf_rl_kind = None
+
+    def revlog(opener, *args, **kwargs):
+        return mercurial.revlog.revlog(opener, *args, **kwargs)
+
 
 def identity(a):
     return a
@@ -1803,7 +1819,8 @@ 
 
     mercurial.revlog._prereadsize = 2 ** 24  # disable lazy parser in old hg
     n = scmutil.revsingle(repo, rev).node()
-    cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i")
+
+    cl = revlog(getsvfs(repo), indexfile=b"00changelog.i")
 
     def d():
         cl.rev(n)
@@ -2592,7 +2609,7 @@ 
     header = struct.unpack(b'>I', data[0:4])[0]
     version = header & 0xFFFF
     if version == 1:
-        revlogio = revlog.revlogio()
+        revlogio = mercurial.revlog.revlogio()
         inline = header & (1 << 16)
     else:
         raise error.Abort(b'unsupported revlog version: %d' % version)
@@ -2611,7 +2628,7 @@ 
     allnodesrev = list(reversed(allnodes))
 
     def constructor():
-        revlog.revlog(opener, indexfile)
+        revlog(opener, indexfile=indexfile)
 
     def read():
         with opener(indexfile) as fh:
@@ -3037,7 +3054,7 @@ 
         vfs = vfsmod.vfs(tmpdir)
         vfs.options = getattr(orig.opener, 'options', None)
 
-        dest = revlog.revlog(
+        dest = revlog(
             vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
         )
         if dest._inline:
diff --git a/contrib/dumprevlog b/contrib/dumprevlog
--- a/contrib/dumprevlog
+++ b/contrib/dumprevlog
@@ -13,6 +13,10 @@ 
 )
 from mercurial.utils import procutil
 
+from mercurial.revlogutils import (
+    constants as revlog_constants,
+)
+
 for fp in (sys.stdin, sys.stdout, sys.stderr):
     procutil.setbinary(fp)
 
@@ -32,7 +36,11 @@ 
 
 
 for f in sys.argv[1:]:
-    r = revlog.revlog(binopen, encoding.strtolocal(f))
+    r = revlog.revlog(
+        binopen,
+        target=(revlog_constants.KIND_OTHER, b'dump-revlog'),
+        indexfile=encoding.strtolocal(f),
+    )
     print("file:", f)
     for i in r:
         n = r.node(i)