Patchwork [2,of,2] commands: print chunk type in debugrevlog

login
register
mail settings
Submitter Gregory Szorc
Date Nov. 18, 2016, 4:36 a.m.
Message ID <4f92246570138fcf4391.1479443819@ubuntu-vm-main>
Download mbox | patch
Permalink /patch/17637/
State Accepted
Headers show

Comments

Gregory Szorc - Nov. 18, 2016, 4:36 a.m.
# HG changeset patch
# User Gregory Szorc <gregory.szorc@gmail.com>
# Date 1479443400 28800
#      Thu Nov 17 20:30:00 2016 -0800
# Node ID 4f92246570138fcf43913a287619706940e33e92
# Parent  3a1a4b0f3fd8445b166608e86829e048770ffa92
commands: print chunk type in debugrevlog

Each data entry ("chunk") in a revlog has a type based on the first
byte of the data. This type indicates how to interpret the data.

This seems like a useful thing to be able to query through a debug
command. So let's add that to `hg debugrevlog`.

This does make `hg debugrevlog` slightly slower, as it has to read
more than just the index. However, even on the mozilla-unified
manifest (which is ~200MB spread over ~350K revisions), this takes
<400ms.
Augie Fackler - Nov. 19, 2016, 1:42 a.m.
On Thu, Nov 17, 2016 at 08:36:59PM -0800, Gregory Szorc wrote:
> # HG changeset patch
> # User Gregory Szorc <gregory.szorc@gmail.com>
> # Date 1479443400 28800
> #      Thu Nov 17 20:30:00 2016 -0800
> # Node ID 4f92246570138fcf43913a287619706940e33e92
> # Parent  3a1a4b0f3fd8445b166608e86829e048770ffa92
> commands: print chunk type in debugrevlog

queued, thanks

>
> Each data entry ("chunk") in a revlog has a type based on the first
> byte of the data. This type indicates how to interpret the data.
>
> This seems like a useful thing to be able to query through a debug
> command. So let's add that to `hg debugrevlog`.
>
> This does make `hg debugrevlog` slightly slower, as it has to read
> more than just the index. However, even on the mozilla-unified
> manifest (which is ~200MB spread over ~350K revisions), this takes
> <400ms.
>
> diff --git a/mercurial/commands.py b/mercurial/commands.py
> --- a/mercurial/commands.py
> +++ b/mercurial/commands.py
> @@ -15,6 +15,7 @@ import random
>  import re
>  import shlex
>  import socket
> +import string
>  import sys
>  import tempfile
>  import time
> @@ -3194,6 +3195,8 @@ def debugrevlog(ui, repo, file_=None, **
>      datasize = [None, 0, 0]
>      fullsize = [None, 0, 0]
>      deltasize = [None, 0, 0]
> +    chunktypecounts = {}
> +    chunktypesizes = {}
>
>      def addsize(size, l):
>          if l[0] is None or size < l[0]:
> @@ -3231,6 +3234,20 @@ def debugrevlog(ui, repo, file_=None, **
>              elif delta != nullrev:
>                  numother += 1
>
> +        # Obtain data on the raw chunks in the revlog.
> +        chunk = r._chunkraw(rev, rev)[1]
> +        if chunk:
> +            chunktype = chunk[0]
> +        else:
> +            chunktype = 'empty'
> +
> +        if chunktype not in chunktypecounts:
> +            chunktypecounts[chunktype] = 0
> +            chunktypesizes[chunktype] = 0
> +
> +        chunktypecounts[chunktype] += 1
> +        chunktypesizes[chunktype] += size
> +
>      # Adjust size min value for empty cases
>      for size in (datasize, fullsize, deltasize):
>          if size[0] is None:
> @@ -3282,6 +3299,24 @@ def debugrevlog(ui, repo, file_=None, **
>      ui.write(('    full      : ') + fmt % pcfmt(fulltotal, totalsize))
>      ui.write(('    deltas    : ') + fmt % pcfmt(deltatotal, totalsize))
>
> +    def fmtchunktype(chunktype):
> +        if chunktype == 'empty':
> +            return '    %s     : ' % chunktype
> +        elif chunktype in string.ascii_letters:
> +            return '    0x%s (%s)  : ' % (hex(chunktype), chunktype)
> +        else:
> +            return '    0x%s      : ' % hex(chunktype)
> +
> +    ui.write('\n')
> +    ui.write(('chunks        : ') + fmt2 % numrevs)
> +    for chunktype in sorted(chunktypecounts):
> +        ui.write(fmtchunktype(chunktype))
> +        ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
> +    ui.write(('chunks size   : ') + fmt2 % totalsize)
> +    for chunktype in sorted(chunktypecounts):
> +        ui.write(fmtchunktype(chunktype))
> +        ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
> +
>      ui.write('\n')
>      fmt = dfmtstr(max(avgchainlen, compratio))
>      ui.write(('avg chain length  : ') + fmt % avgchainlen)
> diff --git a/tests/test-debugcommands.t b/tests/test-debugcommands.t
> --- a/tests/test-debugcommands.t
> +++ b/tests/test-debugcommands.t
> @@ -22,6 +22,11 @@
>        full      : 44 (100.00%)
>        deltas    :  0 ( 0.00%)
>
> +  chunks        :  1
> +      0x75 (u)  :  1 (100.00%)
> +  chunks size   : 44
> +      0x75 (u)  : 44 (100.00%)
> +
>    avg chain length  : 0
>    max chain length  : 0
>    compression ratio : 0
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Patch

diff --git a/mercurial/commands.py b/mercurial/commands.py
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -15,6 +15,7 @@  import random
 import re
 import shlex
 import socket
+import string
 import sys
 import tempfile
 import time
@@ -3194,6 +3195,8 @@  def debugrevlog(ui, repo, file_=None, **
     datasize = [None, 0, 0]
     fullsize = [None, 0, 0]
     deltasize = [None, 0, 0]
+    chunktypecounts = {}
+    chunktypesizes = {}
 
     def addsize(size, l):
         if l[0] is None or size < l[0]:
@@ -3231,6 +3234,20 @@  def debugrevlog(ui, repo, file_=None, **
             elif delta != nullrev:
                 numother += 1
 
+        # Obtain data on the raw chunks in the revlog.
+        chunk = r._chunkraw(rev, rev)[1]
+        if chunk:
+            chunktype = chunk[0]
+        else:
+            chunktype = 'empty'
+
+        if chunktype not in chunktypecounts:
+            chunktypecounts[chunktype] = 0
+            chunktypesizes[chunktype] = 0
+
+        chunktypecounts[chunktype] += 1
+        chunktypesizes[chunktype] += size
+
     # Adjust size min value for empty cases
     for size in (datasize, fullsize, deltasize):
         if size[0] is None:
@@ -3282,6 +3299,24 @@  def debugrevlog(ui, repo, file_=None, **
     ui.write(('    full      : ') + fmt % pcfmt(fulltotal, totalsize))
     ui.write(('    deltas    : ') + fmt % pcfmt(deltatotal, totalsize))
 
+    def fmtchunktype(chunktype):
+        if chunktype == 'empty':
+            return '    %s     : ' % chunktype
+        elif chunktype in string.ascii_letters:
+            return '    0x%s (%s)  : ' % (hex(chunktype), chunktype)
+        else:
+            return '    0x%s      : ' % hex(chunktype)
+
+    ui.write('\n')
+    ui.write(('chunks        : ') + fmt2 % numrevs)
+    for chunktype in sorted(chunktypecounts):
+        ui.write(fmtchunktype(chunktype))
+        ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
+    ui.write(('chunks size   : ') + fmt2 % totalsize)
+    for chunktype in sorted(chunktypecounts):
+        ui.write(fmtchunktype(chunktype))
+        ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
+
     ui.write('\n')
     fmt = dfmtstr(max(avgchainlen, compratio))
     ui.write(('avg chain length  : ') + fmt % avgchainlen)
diff --git a/tests/test-debugcommands.t b/tests/test-debugcommands.t
--- a/tests/test-debugcommands.t
+++ b/tests/test-debugcommands.t
@@ -22,6 +22,11 @@ 
       full      : 44 (100.00%)
       deltas    :  0 ( 0.00%)
   
+  chunks        :  1
+      0x75 (u)  :  1 (100.00%)
+  chunks size   : 44
+      0x75 (u)  : 44 (100.00%)
+  
   avg chain length  : 0
   max chain length  : 0
   compression ratio : 0