Patchwork D10879: revlog: use file read caching for sidedata

login
register
mail settings
Submitter phabricator
Date June 15, 2021, 11:31 a.m.
Message ID <differential-rev-PHID-DREV-fpgtwyglbaw5atnvj777-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/49200/
State Superseded
Headers show

Comments

phabricator - June 15, 2021, 11:31 a.m.
SimonSapin created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  The previous changeset extracted this caching logic from the revlog class into
  a new class. Adding a new instance of that class allows using the same logic
  for side data.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10879

AFFECTED FILES
  mercurial/changelog.py
  mercurial/revlog.py

CHANGE DETAILS




To: SimonSapin, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -629,6 +629,11 @@ 
             self._chunkcachesize,
             chunkcache,
         )
+        self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
+            self.opener,
+            self._sidedatafile,
+            self._chunkcachesize,
+        )
         # revnum -> (chain-length, sum-delta-length)
         self._chaininfocache = util.lrucachedict(500)
         # revlog header -> revlog compressor
@@ -782,6 +787,7 @@ 
         self._revisioncache = None
         self._chainbasecache.clear()
         self._segmentfile.clear_cache()
+        self._segmentfile_sidedata.clear_cache()
         self._pcache = {}
         self._nodemap_docket = None
         self.index.clearcaches()
@@ -1916,31 +1922,17 @@ 
         if sidedata_size == 0:
             return {}
 
-        # XXX this need caching, as we do for data
-        with self._sidedatareadfp() as sdf:
-            if self._docket.sidedata_end < sidedata_offset + sidedata_size:
-                filename = self._sidedatafile
-                end = self._docket.sidedata_end
-                offset = sidedata_offset
-                length = sidedata_size
-                m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
-                raise error.RevlogError(m)
-
-            sdf.seek(sidedata_offset, os.SEEK_SET)
-            comp_segment = sdf.read(sidedata_size)
-
-            if len(comp_segment) < sidedata_size:
-                filename = self._sidedatafile
-                length = sidedata_size
-                offset = sidedata_offset
-                got = len(comp_segment)
-                m = randomaccessfile.PARTIAL_READ_MSG % (
-                    filename,
-                    length,
-                    offset,
-                    got,
-                )
-                raise error.RevlogError(m)
+        if self._docket.sidedata_end < sidedata_offset + sidedata_size:
+            filename = self._sidedatafile
+            end = self._docket.sidedata_end
+            offset = sidedata_offset
+            length = sidedata_size
+            m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
+            raise error.RevlogError(m)
+
+        comp_segment = self._segmentfile_sidedata.read_chunk(
+            sidedata_offset, sidedata_size
+        )
 
         comp = self.index[rev][11]
         if comp == COMP_MODE_PLAIN:
@@ -2033,6 +2025,9 @@ 
             # its usage.
             self._writinghandles = None
             self._segmentfile.writing_handle = None
+            # No need to deal with sidedata writing handle as it is only
+            # relevant with revlog-v2 which is never inline, not reaching
+            # this code
 
         new_dfh = self._datafp(b'w+')
         new_dfh.truncate(0)  # drop any potentially existing data
@@ -2080,6 +2075,9 @@ 
                 self._writinghandles = (ifh, new_dfh, None)
                 self._segmentfile.writing_handle = new_dfh
                 new_dfh = None
+                # No need to deal with sidedata writing handle as it is only
+                # relevant with revlog-v2 which is never inline, not reaching
+                # this code
         finally:
             if new_dfh is not None:
                 new_dfh.close()
@@ -2138,12 +2136,14 @@ 
                 # exposing all file handle for writing.
                 self._writinghandles = (ifh, dfh, sdfh)
                 self._segmentfile.writing_handle = ifh if self._inline else dfh
+                self._segmentfile_sidedata.writing_handle = sdfh
                 yield
                 if self._docket is not None:
                     self._write_docket(transaction)
             finally:
                 self._writinghandles = None
                 self._segmentfile.writing_handle = None
+                self._segmentfile_sidedata.writing_handle = None
                 if dfh is not None:
                     dfh.close()
                 if sdfh is not None:
@@ -2778,6 +2778,7 @@ 
         self._revisioncache = None
         self._chaininfocache = util.lrucachedict(500)
         self._segmentfile.clear_cache()
+        self._segmentfile_sidedata.clear_cache()
 
         del self.index[rev:-1]
 
diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -455,6 +455,7 @@ 
                     self._realopener, self._indexfile, self._delaybuf
                 )
             self._segmentfile.opener = self.opener
+            self._segmentfile_sidedata.opener = self.opener
         self._delayed = True
         tr.addpending(b'cl-%i' % id(self), self._writepending)
         tr.addfinalize(b'cl-%i' % id(self), self._finalize)
@@ -464,6 +465,7 @@ 
         self._delayed = False
         self.opener = self._realopener
         self._segmentfile.opener = self.opener
+        self._segmentfile_sidedata.opener = self.opener
         # move redirected index data back into place
         if self._docket is not None:
             self._write_docket(tr)
@@ -504,6 +506,7 @@ 
             self._divert = True
             self.opener = _divertopener(self._realopener, self._indexfile)
             self._segmentfile.opener = self.opener
+            self._segmentfile_sidedata.opener = self.opener
 
         if self._divert:
             return True