Patchwork D10315: store: also return some information about the type of file `walk` found

login
register
mail settings
Submitter phabricator
Date April 6, 2021, 8:40 a.m.
Message ID <differential-rev-PHID-DREV-atv4ei72ai5jaeltpkrs-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48634/
State Superseded
Headers show

Comments

phabricator - April 6, 2021, 8:40 a.m.
marmoute created this revision.
Herald added a reviewer: durin42.
Herald added a reviewer: martinvonz.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  We start returning of 4th information in the `store.walk` return tuple: the type of the file. This will make it easier for caller to determine which kind of file they are looking at. This should especically help with the `upgrade-repo` code that has to do a lot of fragile index's file name comparison.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10315

AFFECTED FILES
  hgext/largefiles/lfutil.py
  hgext/largefiles/reposetup.py
  hgext/narrow/narrowcommands.py
  hgext/remotefilelog/contentstore.py
  hgext/remotefilelog/remotefilelogserver.py
  mercurial/repair.py
  mercurial/store.py
  mercurial/streamclone.py
  mercurial/upgrade_utils/engine.py
  mercurial/verify.py
  mercurial/wireprotov2server.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS




To: marmoute, durin42, martinvonz, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -743,13 +743,13 @@ 
 
   $ hg clone -U --stream --config ui.ssh="\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/test-repo stream-clone --debug | egrep '00(changelog|manifest)'
   adding [s] 00manifest.n (70 bytes)
-  adding [s] 00manifest.i (313 KB)
   adding [s] 00manifest.d (452 KB)
   adding [s] 00manifest-*.nd (118 KB) (glob)
   adding [s] 00changelog.n (70 bytes)
-  adding [s] 00changelog.i (313 KB)
   adding [s] 00changelog.d (360 KB)
   adding [s] 00changelog-*.nd (118 KB) (glob)
+  adding [s] 00manifest.i (313 KB)
+  adding [s] 00changelog.i (313 KB)
   $ ls -1 stream-clone/.hg/store/ | egrep '00(changelog|manifest)(\.n|-.*\.nd)'
   00changelog-*.nd (glob)
   00changelog.n
diff --git a/mercurial/wireprotov2server.py b/mercurial/wireprotov2server.py
--- a/mercurial/wireprotov2server.py
+++ b/mercurial/wireprotov2server.py
@@ -1582,7 +1582,8 @@ 
 
     # TODO this is a bunch of storage layer interface abstractions because
     # it assumes revlogs.
-    for name, encodedname, size in topfiles:
+    for rl_type, name, encodedname, size in topfiles:
+        # XXX use the `rl_type` for that
         if b'changelog' in files and name.startswith(b'00changelog'):
             pass
         elif b'manifestlog' in files and name.startswith(b'00manifest'):
diff --git a/mercurial/verify.py b/mercurial/verify.py
--- a/mercurial/verify.py
+++ b/mercurial/verify.py
@@ -416,7 +416,7 @@ 
             storefiles = set()
             subdirs = set()
             revlogv1 = self.revlogv1
-            for f, f2, size in repo.store.datafiles():
+            for t, f, f2, size in repo.store.datafiles():
                 if not f:
                     self._err(None, _(b"cannot decode filename '%s'") % f2)
                 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
@@ -480,7 +480,7 @@ 
         ui.status(_(b"checking files\n"))
 
         storefiles = set()
-        for f, f2, size in repo.store.datafiles():
+        for rl_type, f, f2, size in repo.store.datafiles():
             if not f:
                 self._err(None, _(b"cannot decode filename '%s'") % f2)
             elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
diff --git a/mercurial/upgrade_utils/engine.py b/mercurial/upgrade_utils/engine.py
--- a/mercurial/upgrade_utils/engine.py
+++ b/mercurial/upgrade_utils/engine.py
@@ -192,7 +192,7 @@ 
 
     # Perform a pass to collect metadata. This validates we can open all
     # source files and allows a unified progress bar to be displayed.
-    for unencoded, encoded, size in alldatafiles:
+    for revlog_type, unencoded, encoded, size in alldatafiles:
         if not unencoded.endswith(b'.i'):
             continue
 
diff --git a/mercurial/streamclone.py b/mercurial/streamclone.py
--- a/mercurial/streamclone.py
+++ b/mercurial/streamclone.py
@@ -243,7 +243,7 @@ 
     # Get consistent snapshot of repo, lock during scan.
     with repo.lock():
         repo.ui.debug(b'scanning\n')
-        for name, ename, size in _walkstreamfiles(repo):
+        for file_type, name, ename, size in _walkstreamfiles(repo):
             if size:
                 entries.append((name, size))
                 total_bytes += size
@@ -616,7 +616,7 @@ 
             matcher = narrowspec.match(repo.root, includes, excludes)
 
         repo.ui.debug(b'scanning\n')
-        for name, ename, size in _walkstreamfiles(repo, matcher):
+        for rl_type, name, ename, size in _walkstreamfiles(repo, matcher):
             if size:
                 entries.append((_srcstore, name, _fileappend, size))
                 totalfilesize += size
diff --git a/mercurial/store.py b/mercurial/store.py
--- a/mercurial/store.py
+++ b/mercurial/store.py
@@ -1,4 +1,4 @@ 
-# store.py - repository store handling for Mercurial
+# store.py - repository store handling for Mercurial, f), f)
 #
 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
 #
@@ -387,13 +387,44 @@ 
     b'requires',
 ]
 
-REVLOG_FILES_EXT = (b'.i', b'.d', b'.n', b'.nd')
+REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
+REVLOG_FILES_OTHER_EXT = (b'.d', b'.n', b'.nd', b'd.tmpcensored')
+
+
+def is_revlog(f, kind, st):
+    if kind != stat.S_IFREG:
+        return None
+    return revlog_type(f)
+
+
+def revlog_type(f):
+    if f.endswith(REVLOG_FILES_MAIN_EXT):
+        return FILEFLAGS_REVLOG_MAIN
+    elif f.endswith(REVLOG_FILES_OTHER_EXT):
+        return FILETYPE_FILELOG_OTHER
 
 
-def isrevlog(f, kind, st):
-    if kind != stat.S_IFREG:
-        return False
-    return f.endswith(REVLOG_FILES_EXT)
+# the file is part of changelog data
+FILEFLAGS_CHANGELOG = 1 << 13
+# the file is part of manifest data
+FILEFLAGS_MANIFESTLOG = 1 << 12
+# the file is part of filelog data
+FILEFLAGS_FILELOG = 1 << 11
+# file that are not directly part of a revlog
+FILEFLAGS_OTHER = 1 << 10
+
+# the main entry point for a revlog
+FILEFLAGS_REVLOG_MAIN = 1 << 1
+# a secondary file for a revlog
+FILEFLAGS_REVLOG_OTHER = 1 << 0
+
+FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
+FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
+FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
+FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
+FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
+FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
+FILETYPE_OTHER = FILEFLAGS_OTHER
 
 
 class basicstore(object):
@@ -425,9 +456,10 @@ 
                 p = visit.pop()
                 for f, kind, st in readdir(p, stat=True):
                     fp = p + b'/' + f
-                    if isrevlog(f, kind, st):
+                    rl_type = is_revlog(f, kind, st)
+                    if rl_type is not None:
                         n = util.pconvert(fp[striplen:])
-                        l.append((decodedir(n), n, st.st_size))
+                        l.append((rl_type, decodedir(n), n, st.st_size))
                     elif kind == stat.S_IFDIR and recurse:
                         visit.append(fp)
         l.sort()
@@ -445,16 +477,25 @@ 
         return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
 
     def datafiles(self, matcher=None):
-        return self._walk(b'data', True) + self._walk(b'meta', True)
+        files = self._walk(b'data', True) + self._walk(b'meta', True)
+        for (t, u, e, s) in files:
+            yield (FILEFLAGS_FILELOG | t, u, e, s)
 
     def topfiles(self):
         # yield manifest before changelog
-        return reversed(self._walk(b'', False))
+        files = reversed(self._walk(b'', False))
+        for (t, u, e, s) in files:
+            if u.startswith(b'00changelog'):
+                yield (FILEFLAGS_CHANGELOG | t, u, e, s)
+            elif u.startswith(b'00manifest'):
+                yield (FILEFLAGS_MANIFESTLOG | t, u, e, s)
+            else:
+                yield (FILETYPE_OTHER | t, u, e, s)
 
     def walk(self, matcher=None):
         """return file related to data storage (ie: revlogs)
 
-        yields (unencoded, encoded, size)
+        yields (file_type, unencoded, encoded, size)
 
         if a matcher is passed, storage files of only those tracked paths
         are passed with matches the matcher
@@ -500,14 +541,14 @@ 
         self.opener = self.vfs
 
     def datafiles(self, matcher=None):
-        for a, b, size in super(encodedstore, self).datafiles():
+        for t, a, b, size in super(encodedstore, self).datafiles():
             try:
                 a = decodefilename(a)
             except KeyError:
                 a = None
             if a is not None and not _matchtrackedpath(a, matcher):
                 continue
-            yield a, b, size
+            yield t, a, b, size
 
     def join(self, f):
         return self.path + b'/' + encodefilename(f)
@@ -696,7 +737,9 @@ 
                 continue
             ef = self.encode(f)
             try:
-                yield f, ef, self.getsize(ef)
+                t = revlog_type(f)
+                t |= FILEFLAGS_FILELOG
+                yield t, f, ef, self.getsize(ef)
             except OSError as err:
                 if err.errno != errno.ENOENT:
                     raise
diff --git a/mercurial/repair.py b/mercurial/repair.py
--- a/mercurial/repair.py
+++ b/mercurial/repair.py
@@ -428,7 +428,7 @@ 
     if scmutil.istreemanifest(repo):
         # This logic is safe if treemanifest isn't enabled, but also
         # pointless, so we skip it if treemanifest isn't enabled.
-        for unencoded, encoded, size in repo.store.datafiles():
+        for t, unencoded, encoded, size in repo.store.datafiles():
             if unencoded.startswith(b'meta/') and unencoded.endswith(
                 b'00manifest.i'
             ):
diff --git a/hgext/remotefilelog/remotefilelogserver.py b/hgext/remotefilelog/remotefilelogserver.py
--- a/hgext/remotefilelog/remotefilelogserver.py
+++ b/hgext/remotefilelog/remotefilelogserver.py
@@ -164,24 +164,26 @@ 
                                 b'.d'
                             ):
                                 n = util.pconvert(fp[striplen:])
-                                yield (store.decodedir(n), n, st.st_size)
+                                d = store.decodedir(n)
+                                t = store.FILETYPE_OTHER
+                                yield (t, d, n, st.st_size)
                         if kind == stat.S_IFDIR:
                             visit.append(fp)
 
             if scmutil.istreemanifest(repo):
-                for (u, e, s) in repo.store.datafiles():
+                for (t, u, e, s) in repo.store.datafiles():
                     if u.startswith(b'meta/') and (
                         u.endswith(b'.i') or u.endswith(b'.d')
                     ):
-                        yield (u, e, s)
+                        yield (t, u, e, s)
 
             # Return .d and .i files that do not match the shallow pattern
             match = state.match
             if match and not match.always():
-                for (u, e, s) in repo.store.datafiles():
+                for (t, u, e, s) in repo.store.datafiles():
                     f = u[5:-2]  # trim data/...  and .i/.d
                     if not state.match(f):
-                        yield (u, e, s)
+                        yield (t, u, e, s)
 
             for x in repo.store.topfiles():
                 if state.noflatmf and x[0][:11] == b'00manifest.':
diff --git a/hgext/remotefilelog/contentstore.py b/hgext/remotefilelog/contentstore.py
--- a/hgext/remotefilelog/contentstore.py
+++ b/hgext/remotefilelog/contentstore.py
@@ -365,7 +365,7 @@ 
             ledger.markdataentry(self, treename, node)
             ledger.markhistoryentry(self, treename, node)
 
-        for path, encoded, size in self._store.datafiles():
+        for t, path, encoded, size in self._store.datafiles():
             if path[:5] != b'meta/' or path[-2:] != b'.i':
                 continue
 
diff --git a/hgext/narrow/narrowcommands.py b/hgext/narrow/narrowcommands.py
--- a/hgext/narrow/narrowcommands.py
+++ b/hgext/narrow/narrowcommands.py
@@ -276,7 +276,7 @@ 
                 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
 
         todelete = []
-        for f, f2, size in repo.store.datafiles():
+        for t, f, f2, size in repo.store.datafiles():
             if f.startswith(b'data/'):
                 file = f[5:-2]
                 if not newmatch(file):
diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py
--- a/hgext/largefiles/reposetup.py
+++ b/hgext/largefiles/reposetup.py
@@ -445,7 +445,7 @@ 
 
     def checkrequireslfiles(ui, repo, **kwargs):
         if b'largefiles' not in repo.requirements and any(
-            lfutil.shortname + b'/' in f[0] for f in repo.store.datafiles()
+            lfutil.shortname + b'/' in f[1] for f in repo.store.datafiles()
         ):
             repo.requirements.add(b'largefiles')
             scmutil.writereporequirements(repo)
diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py
--- a/hgext/largefiles/lfutil.py
+++ b/hgext/largefiles/lfutil.py
@@ -514,7 +514,7 @@ 
 def islfilesrepo(repo):
     '''Return true if the repo is a largefile repo.'''
     if b'largefiles' in repo.requirements and any(
-        shortnameslash in f[0] for f in repo.store.datafiles()
+        shortnameslash in f[1] for f in repo.store.datafiles()
     ):
         return True