Patchwork [1,of,4,flagprocessor,v8] revlog: add 'raw' argument to revision and _addrevision

login
register
mail settings
Submitter Remi Chaintron
Date Jan. 5, 2017, 5:42 p.m.
Message ID <8c12adc05b5b1c564eb0.1483638121@remi-mbp2>
Download mbox | patch
Permalink /patch/18106/
State Superseded
Headers show

Comments

Remi Chaintron - Jan. 5, 2017, 5:42 p.m.
# HG changeset patch
# User Remi Chaintron <remi@fb.com>
# Date 1483636567 0
#      Thu Jan 05 17:16:07 2017 +0000
# Node ID 8c12adc05b5b1c564eb068d5ad893b33aadd4f76
# Parent  8385310370d0c472373ca9eb8aec59d50218d7e1
revlog: add 'raw' argument to revision and _addrevision

This patch introduces a new 'raw' argument (defaults to False) to revlog's
revision() and _addrevision() methods.
When the 'raw' argument is set to True, it indicates the revision data should be
handled as raw data by the flagprocessor.

Note: Given revlog.addgroup() calls are restricted to changegroup generation, we
can always set raw to True when calling revlog._addrevision() from
revlog.addgroup().
Pierre-Yves David - Jan. 9, 2017, 3:25 p.m.
On 01/05/2017 06:42 PM, Remi Chaintron wrote:
> # HG changeset patch
> # User Remi Chaintron <remi@fb.com>
> # Date 1483636567 0
> #      Thu Jan 05 17:16:07 2017 +0000
> # Node ID 8c12adc05b5b1c564eb068d5ad893b33aadd4f76
> # Parent  8385310370d0c472373ca9eb8aec59d50218d7e1
> revlog: add 'raw' argument to revision and _addrevision

That one looks good, with very small fix on a docstring.

I'm not super happy with the patch order as that docstring reference 
content from two patches later but that's minor and I don't have a 
better slicing idea so lets move forward. (that also why I'm not taking 
patch 1-2 without 3)

>[…]
> diff --git a/mercurial/revlog.py b/mercurial/revlog.py
> --- a/mercurial/revlog.py
> +++ b/mercurial/revlog.py
> […]
> @@ -1412,13 +1414,16 @@
>          return True
>
>      def _addrevision(self, node, text, transaction, link, p1, p2, flags,
> -                     cachedelta, ifh, dfh, alwayscache=False):
> +                     cachedelta, ifh, dfh, alwayscache=False, raw=False):
>          """internal function to add revisions to the log
>
>          see addrevision for argument descriptions.
>          invariants:
>          - text is optional (can be None); if not set, cachedelta must be set.
>            if both are set, they must correspond to each other.
> +        - raw is optional; if set to True, it indicates the revision data is to
> +          be treated by processflags() as raw. It is usually set by changegroup
> +          generation and debug commands.

s/processflags/_processflags/

That would prevent a small hunk to fix it in patch 3.

>          """
>          btext = [text]
>          def buildtext():
> @@ -1438,8 +1443,9 @@
>                      fh = ifh
>                  else:
>                      fh = dfh
> -                basetext = self.revision(self.node(baserev), _df=fh)
> +                basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
>                  btext[0] = mdiff.patch(basetext, delta)
> +
>              try:
>                  self.checkhash(btext[0], node, p1=p1, p2=p2)
>                  if flags & REVIDX_ISCENSORED:
> @@ -1668,10 +1674,14 @@
>                  # the added revision, which will require a call to
>                  # revision(). revision() will fast path if there is a cache
>                  # hit. So, we tell _addrevision() to always cache in this case.
> +                # We're only using addgroup() in the context of changegroup
> +                # generation so the revision data can always be handled as raw
> +                # by the flagprocessor.
>                  chain = self._addrevision(node, None, transaction, link,
>                                            p1, p2, flags, (baserev, delta),
>                                            ifh, dfh,
> -                                          alwayscache=bool(addrevisioncb))
> +                                          alwayscache=bool(addrevisioncb),
> +                                          raw=True)
>
>                  if addrevisioncb:
>                      addrevisioncb(self, chain)
>[…]

Patch

diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py
--- a/mercurial/bundlerepo.py
+++ b/mercurial/bundlerepo.py
@@ -117,7 +117,7 @@ 
         return mdiff.textdiff(self.revision(self.node(rev1)),
                               self.revision(self.node(rev2)))
 
-    def revision(self, nodeorrev):
+    def revision(self, nodeorrev, raw=False):
         """return an uncompressed revision of a given node or revision
         number.
         """
diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -783,7 +783,7 @@ 
         prefix = ''
         if revlog.iscensored(base) or revlog.iscensored(rev):
             try:
-                delta = revlog.revision(node)
+                delta = revlog.revision(node, raw=True)
             except error.CensoredNodeError as e:
                 delta = e.tombstone
             if base == nullrev:
@@ -792,7 +792,7 @@ 
                 baselen = revlog.rawsize(base)
                 prefix = mdiff.replacediffheader(baselen, len(delta))
         elif base == nullrev:
-            delta = revlog.revision(node)
+            delta = revlog.revision(node, raw=True)
             prefix = mdiff.trivialdiffheader(len(delta))
         else:
             delta = revlog.revdiff(base, rev)
diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -1110,6 +1110,9 @@ 
         return filectx(self._repo, self._path, fileid=fileid,
                        filelog=self._filelog, changeid=changeid)
 
+    def rawdata(self):
+        return self._filelog.revision(self._filenode, raw=True)
+
     def data(self):
         try:
             return self._filelog.read(self._filenode)
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -445,7 +445,7 @@ 
         raise error.CommandError('debugdata', _('invalid arguments'))
     r = cmdutil.openrevlog(repo, 'debugdata', file_, opts)
     try:
-        ui.write(r.revision(r.lookup(rev)))
+        ui.write(r.revision(r.lookup(rev), raw=True))
     except KeyError:
         raise error.Abort(_('invalid revision identifier %s') % rev)
 
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -1202,12 +1202,14 @@ 
         return mdiff.textdiff(self.revision(rev1),
                               self.revision(rev2))
 
-    def revision(self, nodeorrev, _df=None):
+    def revision(self, nodeorrev, _df=None, raw=False):
         """return an uncompressed revision of a given node or revision
         number.
 
-        _df is an existing file handle to read from. It is meant to only be
-        used internally.
+        _df - an existing file handle to read from. (internal-only)
+        raw - an optional argument specifying if the revision data is to be
+        treated as raw data when applying flag transforms. 'raw' should be set
+        to True when generating changegroups or in debug commands.
         """
         if isinstance(nodeorrev, int):
             rev = nodeorrev
@@ -1412,13 +1414,16 @@ 
         return True
 
     def _addrevision(self, node, text, transaction, link, p1, p2, flags,
-                     cachedelta, ifh, dfh, alwayscache=False):
+                     cachedelta, ifh, dfh, alwayscache=False, raw=False):
         """internal function to add revisions to the log
 
         see addrevision for argument descriptions.
         invariants:
         - text is optional (can be None); if not set, cachedelta must be set.
           if both are set, they must correspond to each other.
+        - raw is optional; if set to True, it indicates the revision data is to
+          be treated by processflags() as raw. It is usually set by changegroup
+          generation and debug commands.
         """
         btext = [text]
         def buildtext():
@@ -1438,8 +1443,9 @@ 
                     fh = ifh
                 else:
                     fh = dfh
-                basetext = self.revision(self.node(baserev), _df=fh)
+                basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
                 btext[0] = mdiff.patch(basetext, delta)
+
             try:
                 self.checkhash(btext[0], node, p1=p1, p2=p2)
                 if flags & REVIDX_ISCENSORED:
@@ -1668,10 +1674,14 @@ 
                 # the added revision, which will require a call to
                 # revision(). revision() will fast path if there is a cache
                 # hit. So, we tell _addrevision() to always cache in this case.
+                # We're only using addgroup() in the context of changegroup
+                # generation so the revision data can always be handled as raw
+                # by the flagprocessor.
                 chain = self._addrevision(node, None, transaction, link,
                                           p1, p2, flags, (baserev, delta),
                                           ifh, dfh,
-                                          alwayscache=bool(addrevisioncb))
+                                          alwayscache=bool(addrevisioncb),
+                                          raw=True)
 
                 if addrevisioncb:
                     addrevisioncb(self, chain)
diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py
--- a/mercurial/unionrepo.py
+++ b/mercurial/unionrepo.py
@@ -93,7 +93,7 @@ 
         return mdiff.textdiff(self.revision(self.node(rev1)),
                               self.revision(self.node(rev2)))
 
-    def revision(self, nodeorrev):
+    def revision(self, nodeorrev, raw=False):
         """return an uncompressed revision of a given node or revision
         number.
         """