revlog: allow tuning of the chunk cache size (via format.chunkcachesize)
authorBrodie Rao <brodie@sf.io>
Sun, 17 Nov 2013 18:04:29 -0500
changeset 20180 969148b49fc6
parent 20179 5bb3826bdac4
child 20181 b9515fb9e72a
revlog: allow tuning of the chunk cache size (via format.chunkcachesize) Running perfmoonwalk on the Mercurial repo (with almost 20,000 changesets) on Mac OS X with an SSD, before this change: $ hg --config format.chunkcachesize=1024 perfmoonwalk ! wall 2.022021 comb 2.030000 user 1.970000 sys 0.060000 (best of 5) (16,154 cache hits, 3,840 misses.) $ hg --config format.chunkcachesize=4096 perfmoonwalk ! wall 1.901006 comb 1.900000 user 1.880000 sys 0.020000 (best of 6) (19,003 hits, 991 misses.) $ hg --config format.chunkcachesize=16384 perfmoonwalk ! wall 1.802775 comb 1.800000 user 1.800000 sys 0.000000 (best of 6) (19,746 hits, 248 misses.) $ hg --config format.chunkcachesize=32768 perfmoonwalk ! wall 1.818545 comb 1.810000 user 1.810000 sys 0.000000 (best of 6) (19,870 hits, 124 misses.) $ hg --config format.chunkcachesize=65536 perfmoonwalk ! wall 1.801350 comb 1.810000 user 1.800000 sys 0.010000 (best of 6) (19,932 hits, 62 misses.) $ hg --config format.chunkcachesize=131072 perfmoonwalk ! wall 1.805879 comb 1.820000 user 1.810000 sys 0.010000 (best of 6) (19,963 hits, 31 misses.) We may want to change the default size in the future based on testing and user feedback.
mercurial/localrepo.py
mercurial/revlog.py
tests/test-init.t
--- a/mercurial/localrepo.py	Sun Nov 17 18:04:28 2013 -0500
+++ b/mercurial/localrepo.py	Sun Nov 17 18:04:29 2013 -0500
@@ -281,6 +281,9 @@
         self.requirements = requirements
         self.sopener.options = dict((r, 1) for r in requirements
                                            if r in self.openerreqs)
+        chunkcachesize = self.ui.configint('format', 'chunkcachesize')
+        if chunkcachesize is not None:
+            self.sopener.options['chunkcachesize'] = chunkcachesize
 
     def _writerequirements(self):
         reqfile = self.opener("requires", "w")
--- a/mercurial/revlog.py	Sun Nov 17 18:04:28 2013 -0500
+++ b/mercurial/revlog.py	Sun Nov 17 18:04:29 2013 -0500
@@ -202,6 +202,7 @@
         self._cache = None
         self._basecache = None
         self._chunkcache = (0, '')
+        self._chunkcachesize = 65536
         self.index = []
         self._pcache = {}
         self._nodecache = {nullid: nullrev}
@@ -215,6 +216,15 @@
                     v |= REVLOGGENERALDELTA
             else:
                 v = 0
+            if 'chunkcachesize' in opts:
+                self._chunkcachesize = opts['chunkcachesize']
+
+        if self._chunkcachesize <= 0:
+            raise RevlogError(_('revlog chunk cache size %r is not greater '
+                                'than 0') % self._chunkcachesize)
+        elif self._chunkcachesize & (self._chunkcachesize - 1):
+            raise RevlogError(_('revlog chunk cache size %r is not a power '
+                                'of 2') % self._chunkcachesize)
 
         i = ''
         self._initempty = True
@@ -845,8 +855,10 @@
         # Cache data both forward and backward around the requested
         # data, in a fixed size window. This helps speed up operations
         # involving reading the revlog backwards.
-        realoffset = offset & ~65535
-        reallength = ((offset + length + 65536) & ~65535) - realoffset
+        cachesize = self._chunkcachesize
+        realoffset = offset & ~(cachesize - 1)
+        reallength = (((offset + length + cachesize) & ~(cachesize - 1))
+                      - realoffset)
         df.seek(realoffset)
         d = df.read(reallength)
         df.close()
--- a/tests/test-init.t	Sun Nov 17 18:04:28 2013 -0500
+++ b/tests/test-init.t	Sun Nov 17 18:04:29 2013 -0500
@@ -26,6 +26,31 @@
   $ hg ci --cwd local -A -m "init"
   adding foo
 
+test custom revlog chunk cache sizes
+
+  $ hg --config format.chunkcachesize=0 log -R local -pv
+  abort: revlog chunk cache size 0 is not greater than 0!
+  [255]
+  $ hg --config format.chunkcachesize=1023 log -R local -pv
+  abort: revlog chunk cache size 1023 is not a power of 2!
+  [255]
+  $ hg --config format.chunkcachesize=1024 log -R local -pv
+  changeset:   0:08b9e9f63b32
+  tag:         tip
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  files:       foo
+  description:
+  init
+  
+  
+  diff -r 000000000000 -r 08b9e9f63b32 foo
+  --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+  +++ b/foo	Thu Jan 01 00:00:00 1970 +0000
+  @@ -0,0 +1,1 @@
+  +this
+  
+
 creating repo with format.usestore=false
 
   $ hg --config format.usestore=false init old