Patchwork [3,of,3] revlog: allow tuning of the chunk cache size (via format.chunkcachesize)

login
register
mail settings
Submitter Brodie Rao
Date Nov. 17, 2013, 7:51 p.m.
Message ID <f30efa9da91bad23f223.1384717905@hit-nxdomain.opendns.com>
Download mbox | patch
Permalink /patch/3028/
State Superseded
Headers show

Comments

Brodie Rao - Nov. 17, 2013, 7:51 p.m.
# HG changeset patch
# User Brodie Rao <brodie@sf.io>
# Date 1384717822 18000
#      Sun Nov 17 14:50:22 2013 -0500
# Node ID f30efa9da91bad23f223a58428ba1d89802270f3
# Parent  5d08fc0f4d14e41653927ce729eef0303c8f1786
revlog: allow tuning of the chunk cache size (via format.chunkcachesize)

Running perfmoonwalk on the Mercurial repo (with almost 20,000 changesets) on
Mac OS X with an SSD, before this change:

$ hg --config format.chunkcachesize=1024 perfmoonwalk
! wall 2.022021 comb 2.030000 user 1.970000 sys 0.060000 (best of 5)

(16,154 cache hits, 3,840 misses.)

$ hg --config format.chunkcachesize=4096 perfmoonwalk
! wall 1.901006 comb 1.900000 user 1.880000 sys 0.020000 (best of 6)

(19,003 hits, 991 misses.)

$ hg --config format.chunkcachesize=16384 perfmoonwalk
! wall 1.802775 comb 1.800000 user 1.800000 sys 0.000000 (best of 6)

(19,746 hits, 248 misses.)

$ hg --config format.chunkcachesize=32768 perfmoonwalk
! wall 1.818545 comb 1.810000 user 1.810000 sys 0.000000 (best of 6)

(19,870 hits, 124 misses.)

$ hg --config format.chunkcachesize=65536 perfmoonwalk
! wall 1.801350 comb 1.810000 user 1.800000 sys 0.010000 (best of 6)

(19,932 hits, 62 misses.)

$ hg --config format.chunkcachesize=131072 perfmoonwalk
! wall 1.805879 comb 1.820000 user 1.810000 sys 0.010000 (best of 6)

(19,963 hits, 31 misses.)

We may want to change the default size in the future based on testing and
user feedback.
Augie Fackler - Nov. 17, 2013, 10:10 p.m.
On Sun, Nov 17, 2013 at 02:51:45PM -0500, Brodie Rao wrote:
> # HG changeset patch
> # User Brodie Rao <brodie@sf.io>
> # Date 1384717822 18000
> #      Sun Nov 17 14:50:22 2013 -0500
> # Node ID f30efa9da91bad23f223a58428ba1d89802270f3
> # Parent  5d08fc0f4d14e41653927ce729eef0303c8f1786
> revlog: allow tuning of the chunk cache size (via format.chunkcachesize)

Series looks superficially reasonable, someone else should also review.

>
> Running perfmoonwalk on the Mercurial repo (with almost 20,000 changesets) on
> Mac OS X with an SSD, before this change:
>
> $ hg --config format.chunkcachesize=1024 perfmoonwalk
> ! wall 2.022021 comb 2.030000 user 1.970000 sys 0.060000 (best of 5)
>
> (16,154 cache hits, 3,840 misses.)
>
> $ hg --config format.chunkcachesize=4096 perfmoonwalk
> ! wall 1.901006 comb 1.900000 user 1.880000 sys 0.020000 (best of 6)
>
> (19,003 hits, 991 misses.)
>
> $ hg --config format.chunkcachesize=16384 perfmoonwalk
> ! wall 1.802775 comb 1.800000 user 1.800000 sys 0.000000 (best of 6)
>
> (19,746 hits, 248 misses.)
>
> $ hg --config format.chunkcachesize=32768 perfmoonwalk
> ! wall 1.818545 comb 1.810000 user 1.810000 sys 0.000000 (best of 6)
>
> (19,870 hits, 124 misses.)
>
> $ hg --config format.chunkcachesize=65536 perfmoonwalk
> ! wall 1.801350 comb 1.810000 user 1.800000 sys 0.010000 (best of 6)
>
> (19,932 hits, 62 misses.)
>
> $ hg --config format.chunkcachesize=131072 perfmoonwalk
> ! wall 1.805879 comb 1.820000 user 1.810000 sys 0.010000 (best of 6)
>
> (19,963 hits, 31 misses.)
>
> We may want to change the default size in the future based on testing and
> user feedback.
>
> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> --- a/mercurial/localrepo.py
> +++ b/mercurial/localrepo.py
> @@ -279,6 +279,9 @@ class localrepository(object):
>          self.requirements = requirements
>          self.sopener.options = dict((r, 1) for r in requirements
>                                             if r in self.openerreqs)
> +        chunkcachesize = self.ui.configint('format', 'chunkcachesize')
> +        if chunkcachesize is not None:
> +            self.sopener.options['chunkcachesize'] = chunkcachesize
>
>      def _writerequirements(self):
>          reqfile = self.opener("requires", "w")
> diff --git a/mercurial/revlog.py b/mercurial/revlog.py
> --- a/mercurial/revlog.py
> +++ b/mercurial/revlog.py
> @@ -202,6 +202,7 @@ class revlog(object):
>          self._cache = None
>          self._basecache = None
>          self._chunkcache = (0, '')
> +        self._chunkcachesize = 65536
>          self.index = []
>          self._pcache = {}
>          self._nodecache = {nullid: nullrev}
> @@ -215,6 +216,15 @@ class revlog(object):
>                      v |= REVLOGGENERALDELTA
>              else:
>                  v = 0
> +            if 'chunkcachesize' in opts:
> +                self._chunkcachesize = opts['chunkcachesize']
> +
> +        if self._chunkcachesize <= 0:
> +            raise RevlogError(_('revlog chunk cache size %r is not greater '
> +                                'than 0') % self._chunkcachesize)
> +        elif self._chunkcachesize & (self._chunkcachesize - 1):
> +            raise RevlogError(_('revlog chunk cache size %r is not a power '
> +                                'of 2') % self._chunkcachesize)
>
>          i = ''
>          self._initempty = True
> @@ -820,8 +830,10 @@ class revlog(object):
>          else:
>              df = self.opener(self.datafile)
>
> -        realoffset = offset & ~65535
> -        reallength = ((offset + length + 65536) & ~65535) - realoffset
> +        cachesize = self._chunkcachesize
> +        realoffset = offset & ~(cachesize - 1)
> +        reallength = (((offset + length + cachesize) & ~(cachesize - 1))
> +                      - realoffset)
>          df.seek(realoffset)
>          d = df.read(reallength)
>          df.close()
> diff --git a/tests/test-init.t b/tests/test-init.t
> --- a/tests/test-init.t
> +++ b/tests/test-init.t
> @@ -26,6 +26,31 @@ creating 'local'
>    $ hg ci --cwd local -A -m "init"
>    adding foo
>
> +test custom revlog chunk cache sizes
> +
> +  $ hg --config format.chunkcachesize=0 log -R local -pv
> +  abort: revlog chunk cache size 0 is not greater than 0!
> +  [255]
> +  $ hg --config format.chunkcachesize=1023 log -R local -pv
> +  abort: revlog chunk cache size 1023 is not a power of 2!
> +  [255]
> +  $ hg --config format.chunkcachesize=1024 log -R local -pv
> +  changeset:   0:08b9e9f63b32
> +  tag:         tip
> +  user:        test
> +  date:        Thu Jan 01 00:00:00 1970 +0000
> +  files:       foo
> +  description:
> +  init
> +
> +
> +  diff -r 000000000000 -r 08b9e9f63b32 foo
> +  --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
> +  +++ b/foo	Thu Jan 01 00:00:00 1970 +0000
> +  @@ -0,0 +1,1 @@
> +  +this
> +
> +
>  creating repo with format.usestore=false
>
>    $ hg --config format.usestore=false init old
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel

Patch

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -279,6 +279,9 @@  class localrepository(object):
         self.requirements = requirements
         self.sopener.options = dict((r, 1) for r in requirements
                                            if r in self.openerreqs)
+        chunkcachesize = self.ui.configint('format', 'chunkcachesize')
+        if chunkcachesize is not None:
+            self.sopener.options['chunkcachesize'] = chunkcachesize
 
     def _writerequirements(self):
         reqfile = self.opener("requires", "w")
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -202,6 +202,7 @@  class revlog(object):
         self._cache = None
         self._basecache = None
         self._chunkcache = (0, '')
+        self._chunkcachesize = 65536
         self.index = []
         self._pcache = {}
         self._nodecache = {nullid: nullrev}
@@ -215,6 +216,15 @@  class revlog(object):
                     v |= REVLOGGENERALDELTA
             else:
                 v = 0
+            if 'chunkcachesize' in opts:
+                self._chunkcachesize = opts['chunkcachesize']
+
+        if self._chunkcachesize <= 0:
+            raise RevlogError(_('revlog chunk cache size %r is not greater '
+                                'than 0') % self._chunkcachesize)
+        elif self._chunkcachesize & (self._chunkcachesize - 1):
+            raise RevlogError(_('revlog chunk cache size %r is not a power '
+                                'of 2') % self._chunkcachesize)
 
         i = ''
         self._initempty = True
@@ -820,8 +830,10 @@  class revlog(object):
         else:
             df = self.opener(self.datafile)
 
-        realoffset = offset & ~65535
-        reallength = ((offset + length + 65536) & ~65535) - realoffset
+        cachesize = self._chunkcachesize
+        realoffset = offset & ~(cachesize - 1)
+        reallength = (((offset + length + cachesize) & ~(cachesize - 1))
+                      - realoffset)
         df.seek(realoffset)
         d = df.read(reallength)
         df.close()
diff --git a/tests/test-init.t b/tests/test-init.t
--- a/tests/test-init.t
+++ b/tests/test-init.t
@@ -26,6 +26,31 @@  creating 'local'
   $ hg ci --cwd local -A -m "init"
   adding foo
 
+test custom revlog chunk cache sizes
+
+  $ hg --config format.chunkcachesize=0 log -R local -pv
+  abort: revlog chunk cache size 0 is not greater than 0!
+  [255]
+  $ hg --config format.chunkcachesize=1023 log -R local -pv
+  abort: revlog chunk cache size 1023 is not a power of 2!
+  [255]
+  $ hg --config format.chunkcachesize=1024 log -R local -pv
+  changeset:   0:08b9e9f63b32
+  tag:         tip
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  files:       foo
+  description:
+  init
+  
+  
+  diff -r 000000000000 -r 08b9e9f63b32 foo
+  --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+  +++ b/foo	Thu Jan 01 00:00:00 1970 +0000
+  @@ -0,0 +1,1 @@
+  +this
+  
+
 creating repo with format.usestore=false
 
   $ hg --config format.usestore=false init old