revlog: add option to mmap revlog index
authorMark Thomas <mbthomas@fb.com>
Wed, 13 Sep 2017 17:26:26 +0000
changeset 34296 3c9691728237
parent 34295 3bb2a9f25fe9
child 34297 7f02fb920121
revlog: add option to mmap revlog index Following on from Jun Wu's patch last October[1], we have found that using mmap for the revlog index in repos with large revlogs gives a noticable performance improvment (~110ms on each hg invocation), particularly for commands that don't touch the index very much. This changeset adds this as an option, activated by a new experimental config option so that it can be enabled on a per-repo basis. The configuration option specifies an index size threshold at which Mercurial will switch to using mmap to access the index. If the configuration option is not specified, the default remains to load the full file, which seems to be the best option for smaller repos. Some initial performance numbers for average of 5 invocations of `hg log -l 5` for different cache states: | Repo: | HG | FB | |---|---|---| | Index size: | 2.3MB | much bigger | | read (warm): | 237ms | 432ms | | mmap (warm): | 227ms | 321ms | | | (-3%) | (-26%) | | read (cold): | 397ms | 696ms | | mmap (cold): | 410ms | 888ms | | | (+3%) | (+28%) | [1] https://www.mercurial-scm.org/pipermail/mercurial-devel/2016-October/088737.html Test Plan: `hg log --config experimental.mmapindex=true` Differential Revision: https://phab.mercurial-scm.org/D477
mercurial/changelog.py
mercurial/localrepo.py
mercurial/manifest.py
mercurial/revlog.py
tests/test-revlog-mmapindex.t
--- a/mercurial/changelog.py	Thu Sep 21 05:54:34 2017 -0700
+++ b/mercurial/changelog.py	Wed Sep 13 17:26:26 2017 +0000
@@ -277,7 +277,7 @@
 
         datafile = '00changelog.d'
         revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
-                               checkambig=True)
+                               checkambig=True, mmaplargeindex=True)
 
         if self._initempty:
             # changelogs don't benefit from generaldelta
--- a/mercurial/localrepo.py	Thu Sep 21 05:54:34 2017 -0700
+++ b/mercurial/localrepo.py	Wed Sep 13 17:26:26 2017 +0000
@@ -604,6 +604,10 @@
         chainspan = self.ui.configbytes('experimental', 'maxdeltachainspan', -1)
         if 0 <= chainspan:
             self.svfs.options['maxdeltachainspan'] = chainspan
+        mmapindexthreshold = self.ui.configbytes('experimental',
+                                                 'mmapindexthreshold', None)
+        if mmapindexthreshold is not None:
+            self.svfs.options['mmapindexthreshold'] = mmapindexthreshold
 
         for r in self.requirements:
             if r.startswith('exp-compression-'):
--- a/mercurial/manifest.py	Thu Sep 21 05:54:34 2017 -0700
+++ b/mercurial/manifest.py	Wed Sep 13 17:26:26 2017 +0000
@@ -1231,7 +1231,8 @@
 
         super(manifestrevlog, self).__init__(opener, indexfile,
                                              # only root indexfile is cached
-                                             checkambig=not bool(dir))
+                                             checkambig=not bool(dir),
+                                             mmaplargeindex=True)
 
     @property
     def fulltextcache(self):
--- a/mercurial/revlog.py	Thu Sep 21 05:54:34 2017 -0700
+++ b/mercurial/revlog.py	Wed Sep 13 17:26:26 2017 +0000
@@ -268,8 +268,13 @@
 
     If checkambig, indexfile is opened with checkambig=True at
     writing, to avoid file stat ambiguity.
+
+    If mmaplargeindex is True, and an mmapindexthreshold is set, the
+    index will be mmapped rather than read if it is larger than the
+    configured threshold.
     """
-    def __init__(self, opener, indexfile, datafile=None, checkambig=False):
+    def __init__(self, opener, indexfile, datafile=None, checkambig=False,
+                 mmaplargeindex=False):
         """
         create a revlog object
 
@@ -301,6 +306,7 @@
         self._compengine = 'zlib'
         self._maxdeltachainspan = -1
 
+        mmapindexthreshold = None
         v = REVLOG_DEFAULT_VERSION
         opts = getattr(opener, 'options', None)
         if opts is not None:
@@ -323,6 +329,8 @@
                 self._compengine = opts['compengine']
             if 'maxdeltachainspan' in opts:
                 self._maxdeltachainspan = opts['maxdeltachainspan']
+            if mmaplargeindex and 'mmapindexthreshold' in opts:
+                mmapindexthreshold = opts['mmapindexthreshold']
 
         if self._chunkcachesize <= 0:
             raise RevlogError(_('revlog chunk cache size %r is not greater '
@@ -335,7 +343,11 @@
         self._initempty = True
         try:
             f = self.opener(self.indexfile)
-            indexdata = f.read()
+            if (mmapindexthreshold is not None and
+                    self.opener.fstat(f).st_size >= mmapindexthreshold):
+                indexdata = util.buffer(util.mmapread(f))
+            else:
+                indexdata = f.read()
             f.close()
             if len(indexdata) > 0:
                 v = versionformat_unpack(indexdata[:4])[0]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-revlog-mmapindex.t	Wed Sep 13 17:26:26 2017 +0000
@@ -0,0 +1,54 @@
+create verbosemmap.py
+  $ cat << EOF > verbosemmap.py
+  > # extension to make util.mmapread verbose
+  > 
+  > from __future__ import absolute_import
+  > 
+  > from mercurial import (
+  >     extensions,
+  >     util,
+  > )
+  > 
+  > def mmapread(orig, fp):
+  >     print "mmapping %s" % fp.name
+  >     return orig(fp)
+  > 
+  > def extsetup(ui):
+  >     extensions.wrapfunction(util, 'mmapread', mmapread)
+  > EOF
+
+setting up base repo
+  $ hg init a
+  $ cd a
+  $ touch a
+  $ hg add a
+  $ hg commit -qm base
+  $ for i in `$TESTDIR/seq.py 1 100` ; do
+  > echo $i > a
+  > hg commit -qm $i
+  > done
+
+set up verbosemmap extension
+  $ cat << EOF >> $HGRCPATH
+  > [extensions]
+  > verbosemmap=$TESTTMP/verbosemmap.py
+  > EOF
+
+mmap index which is now more than 4k long
+  $ hg log -l 5 -T '{rev}\n' --config experimental.mmapindexthreshold=4k
+  mmapping $TESTTMP/a/.hg/store/00changelog.i (glob)
+  100
+  99
+  98
+  97
+  96
+
+do not mmap index which is still less than 32k
+  $ hg log -l 5 -T '{rev}\n' --config experimental.mmapindexthreshold=32k
+  100
+  99
+  98
+  97
+  96
+
+  $ cd ..