Patchwork D10576: revlog: use a "radix" to address revlog

login
register
mail settings
Submitter phabricator
Date May 3, 2021, 12:07 p.m.
Message ID <differential-rev-PHID-DREV-nws7bdhwxumegnakzs5w-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48904/
State Superseded
Headers show

Comments

phabricator - May 3, 2021, 12:07 p.m.
marmoute created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Instead of pointing to the index directly and to derive the other file from
  that, we directly provide the radix and let the revlog determine the associated
  file path internally. This is more robust and will give us more flexibility for
  picking this file name in the future.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10576

AFFECTED FILES
  contrib/dumprevlog
  contrib/perf.py
  contrib/undumprevlog
  hgext/remotefilelog/contentstore.py
  mercurial/bundlerepo.py
  mercurial/changelog.py
  mercurial/cmdutil.py
  mercurial/filelog.py
  mercurial/manifest.py
  mercurial/revlog.py
  mercurial/unionrepo.py
  tests/test-revlog-raw.py
  tests/test-revlog.t

CHANGE DETAILS




To: marmoute, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/test-revlog.t b/tests/test-revlog.t
--- a/tests/test-revlog.t
+++ b/tests/test-revlog.t
@@ -49,6 +49,6 @@ 
   >>> from mercurial import revlog, vfs
   >>> tvfs = vfs.vfs(b'.')
   >>> tvfs.options = {b'revlogv1': True}
-  >>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), indexfile=b'a.i')
+  >>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), radix=b'a')
   >>> rl.revision(1)
   mpatchError(*'patch cannot be decoded'*) (glob)
diff --git a/tests/test-revlog-raw.py b/tests/test-revlog-raw.py
--- a/tests/test-revlog-raw.py
+++ b/tests/test-revlog-raw.py
@@ -79,12 +79,11 @@ 
     return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
 
 
-def newrevlog(name=b'_testrevlog.i', recreate=False):
+def newrevlog(name=b'_testrevlog', recreate=False):
     if recreate:
-        tvfs.tryunlink(name)
-    rlog = revlog.revlog(
-        tvfs, target=(constants.KIND_OTHER, b'test'), indexfile=name
-    )
+        tvfs.tryunlink(name + b'.i')
+    target = (constants.KIND_OTHER, b'test')
+    rlog = revlog.revlog(tvfs, target=target, radix=name)
     return rlog
 
 
@@ -112,7 +111,7 @@ 
         rlog._storedeltachains = True
 
 
-def addgroupcopy(rlog, tr, destname=b'_destrevlog.i', optimaldelta=True):
+def addgroupcopy(rlog, tr, destname=b'_destrevlog', optimaldelta=True):
     """Copy revlog to destname using revlog.addgroup. Return the copied revlog.
 
     This emulates push or pull. They use changegroup. Changegroup requires
@@ -177,7 +176,7 @@ 
     return dlog
 
 
-def lowlevelcopy(rlog, tr, destname=b'_destrevlog.i'):
+def lowlevelcopy(rlog, tr, destname=b'_destrevlog'):
     """Like addgroupcopy, but use the low level revlog._addrevision directly.
 
     It exercises some code paths that are hard to reach easily otherwise.
@@ -427,7 +426,7 @@ 
 
 
 def makesnapshot(tr):
-    rl = newrevlog(name=b'_snaprevlog3.i', recreate=True)
+    rl = newrevlog(name=b'_snaprevlog3', recreate=True)
     for i in data:
         appendrev(rl, i, tr)
     return rl
@@ -483,7 +482,7 @@ 
         checkrevlog(rl2, expected)
         print('addgroupcopy test passed')
         # Copy via revlog.clone
-        rl3 = newrevlog(name=b'_destrevlog3.i', recreate=True)
+        rl3 = newrevlog(name=b'_destrevlog3', recreate=True)
         rl.clone(tr, rl3)
         checkrevlog(rl3, expected)
         print('clone test passed')
diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py
--- a/mercurial/unionrepo.py
+++ b/mercurial/unionrepo.py
@@ -33,7 +33,7 @@ 
 
 
 class unionrevlog(revlog.revlog):
-    def __init__(self, opener, indexfile, revlog2, linkmapper):
+    def __init__(self, opener, radix, revlog2, linkmapper):
         # How it works:
         # To retrieve a revision, we just need to know the node id so we can
         # look it up in revlog2.
@@ -45,7 +45,7 @@ 
         if target is None:
             # a revlog wrapper, eg: the manifestlog that is not an actual revlog
             target = revlog2._revlog.target
-        revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile)
+        revlog.revlog.__init__(self, opener, target=target, radix=radix)
         self.revlog2 = revlog2
 
         n = len(self)
@@ -164,9 +164,7 @@ 
         changelog.changelog.__init__(self, opener)
         linkmapper = None
         changelog2 = changelog.changelog(opener2)
-        unionrevlog.__init__(
-            self, opener, self._indexfile, changelog2, linkmapper
-        )
+        unionrevlog.__init__(self, opener, self.radix, changelog2, linkmapper)
 
 
 class unionmanifest(unionrevlog, manifest.manifestrevlog):
@@ -174,7 +172,7 @@ 
         manifest.manifestrevlog.__init__(self, nodeconstants, opener)
         manifest2 = manifest.manifestrevlog(nodeconstants, opener2)
         unionrevlog.__init__(
-            self, opener, self._revlog._indexfile, manifest2, linkmapper
+            self, opener, self._revlog.radix, manifest2, linkmapper
         )
 
 
@@ -183,7 +181,7 @@ 
         filelog.filelog.__init__(self, opener, path)
         filelog2 = filelog.filelog(opener2, path)
         self._revlog = unionrevlog(
-            opener, self._revlog._indexfile, filelog2._revlog, linkmapper
+            opener, self._revlog.radix, filelog2._revlog, linkmapper
         )
         self._repo = repo
         self.repotiprev = self._revlog.repotiprev
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -289,9 +289,8 @@ 
         self,
         opener,
         target,
+        radix,
         postfix=None,
-        indexfile=None,
-        datafile=None,
         checkambig=False,
         mmaplargeindex=False,
         censorable=False,
@@ -313,16 +312,19 @@ 
         accurate value.
         """
         self.upperboundcomp = upperboundcomp
-        if not indexfile.endswith(b'.i'):
-            raise error.ProgrammingError(
-                b"revlog's indexfile should end with `.i`"
-            )
-        if datafile is None:
-            datafile = indexfile[:-2] + b".d"
-            if postfix is not None:
-                datafile = b'%s.%s' % (datafile, postfix)
-        if postfix is not None:
-            indexfile = b'%s.%s' % (indexfile, postfix)
+
+        self.radix = radix
+
+        if postfix is None:
+            indexfile = b'%s.i' % self.radix
+            datafile = b'%s.d' % self.radix
+        elif postfix == b'a':
+            indexfile = b'%s.i.a' % self.radix
+            datafile = b'%s.d' % self.radix
+        else:
+            indexfile = b'%s.i.%s' % (self.radix, postfix)
+            datafile = b'%s.d.%s' % (self.radix, postfix)
+
         self._indexfile = indexfile
         self._datafile = datafile
         self.nodemap_file = None
@@ -2900,8 +2902,8 @@ 
         newrl = revlog(
             self.opener,
             target=self.target,
+            radix=self.radix,
             postfix=b'tmpcensored',
-            indexfile=self._indexfile,
             censorable=True,
         )
         newrl._format_version = self._format_version
diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -1562,7 +1562,6 @@ 
         opener,
         tree=b'',
         dirlogcache=None,
-        indexfile=None,
         treemanifest=False,
     ):
         """Constructs a new manifest revlog
@@ -1593,10 +1592,9 @@ 
         if tree:
             assert self._treeondisk, b'opts is %r' % opts
 
-        if indexfile is None:
-            indexfile = b'00manifest.i'
-            if tree:
-                indexfile = b"meta/" + tree + indexfile
+        radix = b'00manifest'
+        if tree:
+            radix = b"meta/" + tree + radix
 
         self.tree = tree
 
@@ -1609,7 +1607,7 @@ 
         self._revlog = revlog.revlog(
             opener,
             target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
-            indexfile=indexfile,
+            radix=radix,
             # only root indexfile is cached
             checkambig=not bool(tree),
             mmaplargeindex=True,
diff --git a/mercurial/filelog.py b/mercurial/filelog.py
--- a/mercurial/filelog.py
+++ b/mercurial/filelog.py
@@ -30,7 +30,7 @@ 
             opener,
             # XXX should use the unencoded path
             target=(revlog_constants.KIND_FILELOG, path),
-            indexfile=b'/'.join((b'data', path + b'.i')),
+            radix=b'/'.join((b'data', path)),
             censorable=True,
         )
         # Full name of the user visible file, relative to the repository root.
diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -1437,7 +1437,7 @@ 
         r = revlog.revlog(
             vfsmod.vfs(encoding.getcwd(), audit=False),
             target=target,
-            indexfile=file_[:-2] + b".i",
+            radix=file_[:-2],
         )
     return r
 
diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -396,20 +396,17 @@ 
         the documentation there.
         """
 
-        indexfile = b'00changelog.i'
         if trypending and opener.exists(b'00changelog.i.a'):
             postfix = b'a'
         else:
             postfix = None
 
-        datafile = b'00changelog.d'
         revlog.revlog.__init__(
             self,
             opener,
             target=(revlog_constants.KIND_CHANGELOG, None),
+            radix=b'00changelog',
             postfix=postfix,
-            indexfile=indexfile,
-            datafile=datafile,
             checkambig=True,
             mmaplargeindex=True,
             persistentnodemap=opener.options.get(b'persistent-nodemap', False),
diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py
--- a/mercurial/bundlerepo.py
+++ b/mercurial/bundlerepo.py
@@ -52,7 +52,7 @@ 
 
 
 class bundlerevlog(revlog.revlog):
-    def __init__(self, opener, target, indexfile, cgunpacker, linkmapper):
+    def __init__(self, opener, target, radix, cgunpacker, linkmapper):
         # How it works:
         # To retrieve a revision, we need to know the offset of the revision in
         # the bundle (an unbundle object). We store this offset in the index
@@ -61,7 +61,7 @@ 
         # To differentiate a rev in the bundle from a rev in the revlog, we
         # check revision against repotiprev.
         opener = vfsmod.readonlyvfs(opener)
-        revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile)
+        revlog.revlog.__init__(self, opener, target=target, radix=radix)
         self.bundle = cgunpacker
         n = len(self)
         self.repotiprev = n - 1
@@ -180,7 +180,7 @@ 
             self,
             opener,
             (revlog_constants.KIND_CHANGELOG, None),
-            self._indexfile,
+            self.radix,
             cgunpacker,
             linkmapper,
         )
@@ -201,7 +201,7 @@ 
             self,
             opener,
             (revlog_constants.KIND_MANIFESTLOG, dir),
-            self._revlog._indexfile,
+            self._revlog.radix,
             cgunpacker,
             linkmapper,
         )
@@ -233,7 +233,7 @@ 
             opener,
             # XXX should use the unencoded path
             target=(revlog_constants.KIND_FILELOG, path),
-            indexfile=self._revlog._indexfile,
+            radix=self._revlog.radix,
             cgunpacker=cgunpacker,
             linkmapper=linkmapper,
         )
diff --git a/hgext/remotefilelog/contentstore.py b/hgext/remotefilelog/contentstore.py
--- a/hgext/remotefilelog/contentstore.py
+++ b/hgext/remotefilelog/contentstore.py
@@ -281,7 +281,7 @@ 
         self._store = repo.store
         self._svfs = repo.svfs
         self._revlogs = dict()
-        self._cl = revlog.revlog(self._svfs, indexfile=b'00changelog.i')
+        self._cl = revlog.revlog(self._svfs, radix=b'00changelog.i')
         self._repackstartlinkrev = 0
 
     def get(self, name, node):
@@ -341,10 +341,10 @@ 
     def _revlog(self, name):
         rl = self._revlogs.get(name)
         if rl is None:
-            revlogname = b'00manifesttree.i'
+            revlogname = b'00manifesttree'
             if name != b'':
-                revlogname = b'meta/%s/00manifest.i' % name
-            rl = revlog.revlog(self._svfs, indexfile=revlogname)
+                revlogname = b'meta/%s/00manifest' % name
+            rl = revlog.revlog(self._svfs, radix=revlogname)
             self._revlogs[name] = rl
         return rl
 
@@ -365,7 +365,7 @@ 
         if options and options.get(constants.OPTION_PACKSONLY):
             return
         treename = b''
-        rl = revlog.revlog(self._svfs, indexfile=b'00manifesttree.i')
+        rl = revlog.revlog(self._svfs, radix=b'00manifesttree')
         startlinkrev = self._repackstartlinkrev
         endlinkrev = self._repackendlinkrev
         for rev in pycompat.xrange(len(rl) - 1, -1, -1):
@@ -382,9 +382,9 @@ 
             if path[:5] != b'meta/' or path[-2:] != b'.i':
                 continue
 
-            treename = path[5 : -len(b'/00manifest.i')]
+            treename = path[5 : -len(b'/00manifest')]
 
-            rl = revlog.revlog(self._svfs, indexfile=path)
+            rl = revlog.revlog(self._svfs, indexfile=path[:-2])
             for rev in pycompat.xrange(len(rl) - 1, -1, -1):
                 linkrev = rl.linkrev(rev)
                 if linkrev < startlinkrev:
diff --git a/contrib/undumprevlog b/contrib/undumprevlog
--- a/contrib/undumprevlog
+++ b/contrib/undumprevlog
@@ -32,10 +32,11 @@ 
         break
     if l.startswith("file:"):
         f = encoding.strtolocal(l[6:-1])
+        assert f.endswith(b'.i')
         r = revlog.revlog(
             opener,
             target=(revlog_constants.KIND_OTHER, b'undump-revlog'),
-            indexfile=f,
+            radix=f[:-2],
         )
         procutil.stdout.write(b'%s\n' % f)
     elif l.startswith("node:"):
diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -1826,7 +1826,10 @@ 
     mercurial.revlog._prereadsize = 2 ** 24  # disable lazy parser in old hg
     n = scmutil.revsingle(repo, rev).node()
 
-    cl = revlog(getsvfs(repo), indexfile=b"00changelog.i")
+    try:
+        cl = revlog(getsvfs(repo), radix=b"00changelog")
+    except TypeError:
+        cl = revlog(getsvfs(repo), indexfile=b"00changelog.i")
 
     def d():
         cl.rev(n)
@@ -2610,6 +2613,7 @@ 
 
     opener = getattr(rl, 'opener')  # trick linter
     # compat with hg <= 5.8
+    radix = getattr(rl, 'radix', None)
     indexfile = getattr(rl, '_indexfile', None)
     if indexfile is None:
         # compatibility with <= hg-5.8
@@ -2641,7 +2645,11 @@ 
     allnodesrev = list(reversed(allnodes))
 
     def constructor():
-        revlog(opener, indexfile=indexfile)
+        if radix is not None:
+            revlog(opener, radix=radix)
+        else:
+            # hg <= 5.8
+            revlog(opener, indexfile=indexfile)
 
     def read():
         with opener(indexfile) as fh:
@@ -3043,8 +3051,9 @@ 
 
     datafile = getattr(orig, '_datafile', getattr(orig, 'datafile'))
     origdatapath = orig.opener.join(datafile)
-    indexname = 'revlog.i'
-    dataname = 'revlog.d'
+    radix = b'revlog'
+    indexname = b'revlog.i'
+    dataname = b'revlog.d'
 
     tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
     try:
@@ -3069,9 +3078,12 @@ 
         vfs = vfsmod.vfs(tmpdir)
         vfs.options = getattr(orig.opener, 'options', None)
 
-        dest = revlog(
-            vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
-        )
+        try:
+            dest = revlog(vfs, radix=radix, **revlogkwargs)
+        except TypeError:
+            dest = revlog(
+                vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
+            )
         if dest._inline:
             raise error.Abort('not supporting inline revlog (yet)')
         # make sure internals are initialized
diff --git a/contrib/dumprevlog b/contrib/dumprevlog
--- a/contrib/dumprevlog
+++ b/contrib/dumprevlog
@@ -36,10 +36,15 @@ 
 
 
 for f in sys.argv[1:]:
+    localf = encoding.strtolocal(f)
+    if not localf.endswith(b'.i'):
+        print("file:", f, file=sys.stderr)
+        print("  invalida filename", file=sys.stderr)
+
     r = revlog.revlog(
         binopen,
         target=(revlog_constants.KIND_OTHER, b'dump-revlog'),
-        indexfile=encoding.strtolocal(f),
+        radix=localf[:-2],
     )
     print("file:", f)
     for i in r: