branchmap: pass changelog into branchmap functions
authorGregory Szorc <gregory.szorc@gmail.com>
Fri, 19 Oct 2018 16:16:17 +0200
changeset 40374 47c03042cd1d
parent 40373 c3ab0a89331d
child 40375 76d4272bd57b
branchmap: pass changelog into branchmap functions As part of building the branchmap, we loop over revs and call branchmap() or _branchmap(). Previously, these functions were accessing repo.changelog. We know from past experience that repo.changelog in loops is bad for performance. This commit teaches the branchmap code to pass a changelog instance into branchmap() and _branchmap() so we don't need to pay this penalty. On my MBP, this appears to show a speedup on a clone of the mozilla-unified repo: $ hg perfbranchmap --clear-revbranch ! base ! wall 21.078160 comb 21.070000 user 20.920000 sys 0.150000 (best of 3) ! wall 20.574682 comb 20.560000 user 20.400000 sys 0.160000 (best of 3) $ hg perfbranchmap ! base ! wall 4.880413 comb 4.870000 user 4.860000 sys 0.010000 (best of 3) ! wall 4.573968 comb 4.560000 user 4.550000 sys 0.010000 (best of 3) Differential Revision: https://phab.mercurial-scm.org/D5161
mercurial/branchmap.py
--- a/mercurial/branchmap.py	Thu Oct 18 16:36:10 2018 -0400
+++ b/mercurial/branchmap.py	Fri Oct 19 16:16:17 2018 +0200
@@ -278,7 +278,7 @@
         newbranches = {}
         getbranchinfo = repo.revbranchcache().branchinfo
         for r in revgen:
-            branch, closesbranch = getbranchinfo(r)
+            branch, closesbranch = getbranchinfo(r, changelog=cl)
             newbranches.setdefault(branch, []).append(r)
             if closesbranch:
                 self._closednodes.add(cl.node(r))
@@ -404,10 +404,10 @@
         self._rbcrevslen = len(self._repo.changelog)
         self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
 
-    def branchinfo(self, rev):
+    def branchinfo(self, rev, changelog=None):
         """Return branch name and close flag for rev, using and updating
         persistent cache."""
-        changelog = self._repo.changelog
+        changelog = changelog or self._repo.changelog
         rbcrevidx = rev * _rbcrecsize
 
         # avoid negative index, changelog.read(nullrev) is fast without cache
@@ -416,7 +416,7 @@
 
         # if requested rev isn't allocated, grow and cache the rev info
         if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
-            return self._branchinfo(rev)
+            return self._branchinfo(rev, changelog=changelog)
 
         # fast path: extract data from cache, use it if node is matching
         reponode = changelog.node(rev)[:_rbcnodelen]
@@ -444,11 +444,11 @@
             self._rbcrevslen = min(self._rbcrevslen, truncate)
 
         # fall back to slow path and make sure it will be written to disk
-        return self._branchinfo(rev)
+        return self._branchinfo(rev, changelog=changelog)
 
-    def _branchinfo(self, rev):
+    def _branchinfo(self, rev, changelog=None):
         """Retrieve branch info from changelog and update _rbcrevs"""
-        changelog = self._repo.changelog
+        changelog = changelog or self._repo.changelog
         b, close = changelog.branchinfo(rev)
         if b in self._namesreverse:
             branchidx = self._namesreverse[b]
@@ -459,7 +459,7 @@
         reponode = changelog.node(rev)
         if close:
             branchidx |= _rbccloseflag
-        self._setcachedata(rev, reponode, branchidx)
+        self._setcachedata(rev, reponode, branchidx, changelog)
         return b, close
 
     def setdata(self, branch, rev, node, close):
@@ -482,14 +482,16 @@
         if r'branchinfo' in vars(self):
             del self.branchinfo
 
-    def _setcachedata(self, rev, node, branchidx):
+    def _setcachedata(self, rev, node, branchidx, changelog=None):
         """Writes the node's branch data to the in-memory cache data."""
         if rev == nullrev:
             return
+
+        changelog = changelog or self._repo.changelog
         rbcrevidx = rev * _rbcrecsize
         if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
             self._rbcrevs.extend('\0' *
-                                 (len(self._repo.changelog) * _rbcrecsize -
+                                 (len(changelog) * _rbcrecsize -
                                   len(self._rbcrevs)))
         pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
         self._rbcrevslen = min(self._rbcrevslen, rev)