Patchwork [1,of,3] revlog: improve documentation

login
register
mail settings
Submitter Gregory Szorc
Date Nov. 23, 2015, 3:34 a.m.
Message ID <7783c253a56123557de4.1448249648@ubuntu-main>
Download mbox | patch
Permalink /patch/11588/
State Accepted
Delegated to: Pierre-Yves David
Headers show

Comments

Gregory Szorc - Nov. 23, 2015, 3:34 a.m.
# HG changeset patch
# User Gregory Szorc <gregory.szorc@gmail.com>
# Date 1448238200 28800
#      Sun Nov 22 16:23:20 2015 -0800
# Node ID 7783c253a56123557de44802f7ec235e7f33d85c
# Parent  138cc82144ee0335335533dcfc33968987aa3dc8
revlog: improve documentation

There are a lot of functions and variables doing similar things.
Document the role and functionality of each to make it easier to
grok.
Pierre-Yves David - Nov. 23, 2015, 4:44 a.m.
On 11/22/2015 07:34 PM, Gregory Szorc wrote:
> # HG changeset patch
> # User Gregory Szorc <gregory.szorc@gmail.com>
> # Date 1448238200 28800
> #      Sun Nov 22 16:23:20 2015 -0800
> # Node ID 7783c253a56123557de44802f7ec235e7f33d85c
> # Parent  138cc82144ee0335335533dcfc33968987aa3dc8
> revlog: improve documentation

That one is in the clowncopter. Waiting on more details on the other two 
intend before moving forward with them.

Patch

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -200,24 +200,31 @@  class revlog(object):
         create a revlog object
 
         opener is a function that abstracts the file opening operation
         and can be used to implement COW semantics or the like.
         """
         self.indexfile = indexfile
         self.datafile = indexfile[:-2] + ".d"
         self.opener = opener
+        # 3-tuple of (node, rev, text) for a raw revision.
         self._cache = None
+        # 2-tuple of (rev, baserev) defining the base revision the delta chain
+        # begins at for a revision.
         self._basecache = None
+        # 2-tuple of (offset, data) of raw data from the revlog at an offset.
         self._chunkcache = (0, '')
+        # How much data to read and cache into the raw revlog data cache.
         self._chunkcachesize = 65536
         self._maxchainlen = None
         self._aggressivemergedeltas = False
         self.index = []
+        # Mapping of partial identifiers to full nodes.
         self._pcache = {}
+        # Mapping of revision integer to full node.
         self._nodecache = {nullid: nullrev}
         self._nodepos = None
 
         v = REVLOG_DEFAULT_VERSION
         opts = getattr(opener, 'options', None)
         if opts is not None:
             if 'revlogv1' in opts:
                 if 'generaldelta' in opts:
@@ -921,31 +928,37 @@  class revlog(object):
         """compare text with a given file revision
 
         returns True if text is different than what is stored.
         """
         p1, p2 = self.parents(node)
         return hash(text, p1, p2) != node
 
     def _addchunk(self, offset, data):
+        """Add a segment to the revlog cache.
+
+        Accepts an absolute offset and the data that is at that location.
+        """
         o, d = self._chunkcache
         # try to add to existing cache
         if o + len(d) == offset and len(d) + len(data) < _chunksize:
             self._chunkcache = o, d + data
         else:
             self._chunkcache = offset, data
 
     def _loadchunk(self, offset, length, df=None):
-        """Load a chunk/segment from the revlog.
+        """Load a segment of raw data from the revlog.
 
-        Accepts absolute offset, length to read, and an optional existing
+        Accepts an absolute offset, length to read, and an optional existing
         file handle to read from.
 
         If an existing file handle is passed, it will be seeked and the
         original seek position will NOT be restored.
+
+        Returns a str or buffer of raw byte data.
         """
         if df is not None:
             closehandle = False
         else:
             if self._inline:
                 df = self.opener(self.indexfile)
             else:
                 df = self.opener(self.datafile)
@@ -963,45 +976,84 @@  class revlog(object):
         if closehandle:
             df.close()
         self._addchunk(realoffset, d)
         if offset != realoffset or reallength != length:
             return util.buffer(d, offset - realoffset, length)
         return d
 
     def _getchunk(self, offset, length, df=None):
+        """Obtain a segment of raw data from the revlog.
+
+        Accepts an absolute offset, length of bytes to obtain, and an
+        optional file handle to the already-opened revlog. If the file
+        handle is used, it's original seek position will not be preserved.
+
+        Requests for data may be returned from a cache.
+
+        Returns a str or a buffer instance of raw byte data.
+        """
         o, d = self._chunkcache
         l = len(d)
 
         # is it in the cache?
         cachestart = offset - o
         cacheend = cachestart + length
         if cachestart >= 0 and cacheend <= l:
             if cachestart == 0 and cacheend == l:
                 return d # avoid a copy
             return util.buffer(d, cachestart, cacheend - cachestart)
 
         return self._loadchunk(offset, length, df=df)
 
     def _chunkraw(self, startrev, endrev, df=None):
+        """Obtain a segment of raw data corresponding to a range of revisions.
+
+        Accepts the start and end revisions and an optional already-open
+        file handle to be used for reading. If the file handle is read, its
+        seek position will not be preserved.
+
+        Requests for data may be satisfied by a cache.
+
+        Returns a str or a buffer instance of raw byte data. Callers will
+        need to call ``self.start(rev)`` and ``self.length()`` to determine
+        where each revision's data begins and ends.
+        """
         start = self.start(startrev)
         end = self.end(endrev)
         if self._inline:
             start += (startrev + 1) * self._io.size
             end += (endrev + 1) * self._io.size
         length = end - start
         return self._getchunk(start, length, df=df)
 
     def _chunk(self, rev, df=None):
+        """Obtain a single decompressed chunk for a revision.
+
+        Accepts an integer revision and an optional already-open file handle
+        to be used for reading. If used, the seek position of the file will not
+        be preserved.
+
+        Returns a str holding uncompressed data for the requested revision.
+        """
         return decompress(self._chunkraw(rev, rev, df=df))
 
     def _chunks(self, revs, df=None):
-        '''faster version of [self._chunk(rev) for rev in revs]
+        """Obtain decompressed chunks for the specified revisions.
 
-        Assumes that revs is in ascending order.'''
+        Accepts an iterable of numeric revisions that are assumed to be in
+        ascending order. Also accepts an optional already-open file handle
+        to be used for reading. If used, the seek position of the file will
+        not be preserved.
+
+        This function is similar to calling ``self._chunk()`` multiple times,
+        but is faster.
+
+        Returns a list with decompressed data for each requested revision.
+        """
         if not revs:
             return []
         start = self.start
         length = self.length
         inline = self._inline
         iosize = self._io.size
         buffer = util.buffer
 
@@ -1027,16 +1079,17 @@  class revlog(object):
             if inline:
                 chunkstart += (rev + 1) * iosize
             chunklength = length(rev)
             ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
 
         return l
 
     def _chunkclear(self):
+        """Clear the raw chunk cache."""
         self._chunkcache = (0, '')
 
     def deltaparent(self, rev):
         """return deltaparent of the given revision"""
         base = self.index[rev][3]
         if base == rev:
             return nullrev
         elif self._generaldelta: