Patchwork D9237: transaction: only keep file names in-memory for journal [WIP]

login
register
mail settings
Submitter phabricator
Date Oct. 21, 2020, 9:45 p.m.
Message ID <differential-rev-PHID-DREV-qozzwcjywakmcchl4nkr-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/47503/
State Superseded
Headers show

Comments

phabricator - Oct. 21, 2020, 9:45 p.m.
joerg.sonnenberger created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  The offsets are normally only used during rollback and can be read back
  from disk in that case. The exception is currently the migration from
  inline to non-inline revlog. The current iteration scans the on-disk
  journal and computes the last revision based on that.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D9237

AFFECTED FILES
  mercurial/repair.py
  mercurial/revlog.py
  mercurial/transaction.py
  tests/test-mq-qpush-fail.t

CHANGE DETAILS




To: joerg.sonnenberger, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/test-mq-qpush-fail.t b/tests/test-mq-qpush-fail.t
--- a/tests/test-mq-qpush-fail.t
+++ b/tests/test-mq-qpush-fail.t
@@ -45,7 +45,7 @@ 
   >     # Touching files truncated at "transaction.abort" causes
   >     # forcible re-loading invalidated filecache properties
   >     # (including repo.changelog)
-  >     for f, o, _ignore in entries:
+  >     for f, o in entries:
   >         if o or not unlink:
   >             os.utime(opener.join(f), (0.0, 0.0))
   > def extsetup(ui):
diff --git a/mercurial/transaction.py b/mercurial/transaction.py
--- a/mercurial/transaction.py
+++ b/mercurial/transaction.py
@@ -58,7 +58,7 @@ 
     unlink=True,
     checkambigfiles=None,
 ):
-    for f, o, _ignore in entries:
+    for f, o in entries:
         if o or not unlink:
             checkambig = checkambigfiles and (f, b'') in checkambigfiles
             try:
@@ -160,8 +160,7 @@ 
         vfsmap[b''] = opener  # set default value
         self._vfsmap = vfsmap
         self._after = after
-        self._entries = []
-        self._map = {}
+        self._map = set()
         self._journal = journalname
         self._undoname = undoname
         self._queue = []
@@ -182,7 +181,7 @@ 
 
         # a dict of arguments to be passed to hooks
         self.hookargs = {}
-        self._file = opener.open(self._journal, b"w")
+        self._file = opener.open(self._journal, b"w+")
 
         # a list of ('location', 'path', 'backuppath', cache) entries.
         # - if 'backuppath' is empty, no file existed at backup time
@@ -245,26 +244,25 @@ 
         This is used by strip to delay vision of strip offset. The transaction
         sees either none or all of the strip actions to be done."""
         q = self._queue.pop()
-        for f, o, data in q:
-            self._addentry(f, o, data)
+        for f, o in q:
+            self._addentry(f, o)
 
     @active
-    def add(self, file, offset, data=None):
+    def add(self, file, offset):
         """record the state of an append-only file before update"""
         if file in self._map or file in self._backupmap:
             return
         if self._queue:
-            self._queue[-1].append((file, offset, data))
+            self._queue[-1].append((file, offset))
             return
 
-        self._addentry(file, offset, data)
+        self._addentry(file, offset)
 
-    def _addentry(self, file, offset, data):
+    def _addentry(self, file, offset):
         """add a append-only entry to memory and on-disk state"""
         if file in self._map or file in self._backupmap:
             return
-        self._entries.append((file, offset, data))
-        self._map[file] = len(self._entries) - 1
+        self._map.add(file)
         # add enough data to the journal to do the truncate
         self._file.write(b"%s\0%d\n" % (file, offset))
         self._file.flush()
@@ -397,15 +395,19 @@ 
         return any
 
     @active
-    def find(self, file):
-        if file in self._map:
-            return self._entries[self._map[file]]
-        if file in self._backupmap:
-            return self._backupentries[self._backupmap[file]]
-        return None
+    def findjournaloffset(self, file):
+        if file not in self._map:
+            return None
+        self._file.seek(0)
+        offset = None
+        for l in self._file:
+            f, o = l.split(b'\0')
+            if f == file:
+                offset = o
+        return offset
 
     @active
-    def replace(self, file, offset, data=None):
+    def replace(self, file, offset):
         '''
         replace can only replace already committed entries
         that are not pending in the queue
@@ -413,8 +415,6 @@ 
 
         if file not in self._map:
             raise KeyError(file)
-        index = self._map[file]
-        self._entries[index] = (file, offset, data)
         self._file.write(b"%s\0%d\n" % (file, offset))
         self._file.flush()
 
@@ -554,7 +554,7 @@ 
                     self._report(
                         b"couldn't remove %s: %s\n" % (vfs.join(b), inst)
                     )
-        self._entries = []
+        self._map = set()
         self._writeundo()
         if self._after:
             self._after()
@@ -633,11 +633,17 @@ 
     def _abort(self):
         self._count = 0
         self._usages = 0
+        mapping = []
+        if self._map:
+            self._file.seek(0)
+            for l in self._file:
+                f, o = l.split(b'\0')
+                mapping.append((f, int(o)))
         self._file.close()
         self._backupsfile.close()
 
         try:
-            if not self._entries and not self._backupentries:
+            if not self._map and not self._backupentries:
                 if self._backupjournal:
                     self._opener.unlink(self._backupjournal)
                 if self._journal:
@@ -656,7 +662,7 @@ 
                     self._report,
                     self._opener,
                     self._vfsmap,
-                    self._entries,
+                    mapping,
                     self._backupentries,
                     False,
                     checkambigfiles=self._checkambigfiles,
@@ -698,7 +704,7 @@ 
     for l in lines:
         try:
             f, o = l.split(b'\0')
-            entries.append((f, int(o), None))
+            entries.append((f, int(o)))
         except ValueError:
             report(
                 _(b"couldn't read journal entry %r!\n") % pycompat.bytestr(l)
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -1999,22 +1999,15 @@ 
             or (self.start(tiprev) + self.length(tiprev)) < _maxinline
         ):
             return
-
-        trinfo = tr.find(self.indexfile)
+        tr.add(self.datafile, 0)
+
+        trinfo = tr.findjournaloffset(self.indexfile)
         if trinfo is None:
             raise error.RevlogError(
                 _(b"%s not found in the transaction") % self.indexfile
             )
-
-        trindex = trinfo[2]
-        if trindex is not None:
-            dataoff = self.start(trindex)
-        else:
-            # revlog was stripped at start of transaction, use all leftover data
-            trindex = len(self) - 1
-            dataoff = self.end(tiprev)
-
-        tr.add(self.datafile, dataoff)
+        troffset = trinfo[1]
+        trindex = 0
 
         if fp:
             fp.flush()
@@ -2026,6 +2019,8 @@ 
         with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
             for r in self:
                 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
+                if troffset <= self.start(r):
+                    trindex = r
 
         with self._indexfp(b'w') as fp:
             self.version &= ~FLAG_INLINE_DATA
@@ -2361,7 +2356,7 @@ 
             ifh.write(entry)
         else:
             offset += curr * self._io.size
-            transaction.add(self.indexfile, offset, curr)
+            transaction.add(self.indexfile, offset)
             ifh.write(entry)
             ifh.write(data[0])
             ifh.write(data[1])
@@ -2397,10 +2392,10 @@ 
         ifh = self._indexfp(b"a+")
         isize = r * self._io.size
         if self._inline:
-            transaction.add(self.indexfile, end + isize, r)
+            transaction.add(self.indexfile, end + isize)
             dfh = None
         else:
-            transaction.add(self.indexfile, isize, r)
+            transaction.add(self.indexfile, isize)
             transaction.add(self.datafile, end)
             dfh = self._datafp(b"a+")
 
diff --git a/mercurial/repair.py b/mercurial/repair.py
--- a/mercurial/repair.py
+++ b/mercurial/repair.py
@@ -209,7 +209,7 @@ 
                 # transaction and makes assumptions that file storage is
                 # using append-only files. We'll need some kind of storage
                 # API to handle stripping for us.
-                offset = len(tr._entries)
+                before = tr._map.copy()
 
                 tr.startgroup()
                 cl.strip(striprev, tr)
@@ -219,12 +219,18 @@ 
                     repo.file(fn).strip(striprev, tr)
                 tr.endgroup()
 
-                for i in pycompat.xrange(offset, len(tr._entries)):
-                    file, troffset, ignore = tr._entries[i]
-                    with repo.svfs(file, b'a', checkambig=True) as fp:
-                        fp.truncate(troffset)
-                    if troffset == 0:
-                        repo.store.markremoved(file)
+                after = tr._map.difference(before)
+                if after:
+                    tr._file.seek(0)
+                    for l in tr._file:
+                        file, troffset = l.split(b'\0')
+                        if file not in after:
+                            continue
+                        troffset = int(troffset)
+                        with repo.svfs(file, b'a', checkambig=True) as fp:
+                            fp.truncate(troffset)
+                        if troffset == 0:
+                            repo.store.markremoved(file)
 
                 deleteobsmarkers(repo.obsstore, stripobsidx)
                 del repo.obsstore