branchcache: skip entries that are topological heads in the on disk file
authorPierre-Yves David <pierre-yves.david@octobus.net>
Thu, 07 Mar 2024 10:55:22 +0100
changeset 51531 f85f23f1479b
parent 51530 fc710c993ec9
child 51532 a0ef462cf1a4
branchcache: skip entries that are topological heads in the on disk file In the majority of cases, topological heads are also branch heads. We have efficient way to get the topological heads and efficient way to retrieve their branch information. So there is little value in putting them in the branch cache file explicitly. On the contrary, writing them explicitly tend to create very large cache file that are inefficient to read and update. So the branch cache v3 format is no longer including them. This changeset focus on the format aspect and have no focus on the performance aspect. We will cover that later.
mercurial/branchmap.py
mercurial/configitems.toml
tests/test-branches-obsolete.t
tests/test-branches.t
--- a/mercurial/branchmap.py	Thu Mar 07 01:35:43 2024 +0100
+++ b/mercurial/branchmap.py	Thu Mar 07 10:55:22 2024 +0100
@@ -594,7 +594,7 @@
             filename = self._filename(repo)
             with repo.cachevfs(filename, b"w", atomictemp=True) as f:
                 self._write_header(f)
-                nodecount = self._write_heads(f)
+                nodecount = self._write_heads(repo, f)
             repo.ui.log(
                 b'branchcache',
                 b'wrote %s with %d labels and %d nodes\n',
@@ -613,7 +613,7 @@
     def _write_header(self, fp) -> None:
         raise NotImplementedError
 
-    def _write_heads(self, fp) -> int:
+    def _write_heads(self, repo, fp) -> int:
         """write list of heads to a file
 
         Return the number of heads written."""
@@ -827,11 +827,27 @@
     The open/closed state is represented by a single letter 'o' or 'c'.
     This field can be used to avoid changelog reads when determining if a
     branch head closes a branch or not.
+
+    Topological heads are not included in the listing and should be dispatched
+    on the right branch at read time. Obsolete topological heads should be
+    ignored.
     """
 
     _base_filename = b"branch3"
     _default_key_hashes = (None, None)
 
+    def _get_topo_heads(self, repo) -> List[int]:
+        """returns the topological head of a repoview content up to self.tiprev"""
+        cl = repo.changelog
+        if self.tiprev == nullrev:
+            return []
+        elif self.tiprev == cl.tiprev():
+            return cl.headrevs()
+        else:
+            # XXX passing tiprev as ceiling of cl.headrevs could be faster
+            heads = cl.headrevs(cl.revs(stop=self.tiprev))
+            return heads
+
     def _write_header(self, fp) -> None:
         cache_keys = {
             b"tip-node": hex(self.tipnode),
@@ -845,6 +861,27 @@
         pieces = (b"%s=%s" % i for i in sorted(cache_keys.items()))
         fp.write(b" ".join(pieces) + b'\n')
 
+    def _write_heads(self, repo, fp) -> int:
+        """write list of heads to a file
+
+        Return the number of heads written."""
+        nodecount = 0
+        topo_heads = set(self._get_topo_heads(repo))
+        to_rev = repo.changelog.index.rev
+        for label, nodes in sorted(self._entries.items()):
+            label = encoding.fromlocal(label)
+            for node in nodes:
+                rev = to_rev(node)
+                if rev in topo_heads:
+                    continue
+                if node in self._closednodes:
+                    state = b'c'
+                else:
+                    state = b'o'
+                nodecount += 1
+                fp.write(b"%s %s %s\n" % (hex(node), state, label))
+        return nodecount
+
     @classmethod
     def _load_header(cls, repo, lineiter):
         header_line = next(lineiter)
@@ -869,6 +906,28 @@
         args["key_hashes"] = (filtered_hash, obsolete_hash)
         return args
 
+    def _load_heads(self, repo, lineiter):
+        """fully loads the branchcache by reading from the file using the line
+        iterator passed"""
+        super()._load_heads(repo, lineiter)
+        cl = repo.changelog
+        getbranchinfo = repo.revbranchcache().branchinfo
+        obsrevs = obsolete.getrevs(repo, b'obsolete')
+        to_node = cl.node
+        touched_branch = set()
+        for head in self._get_topo_heads(repo):
+            if head in obsrevs:
+                continue
+            node = to_node(head)
+            branch, closed = getbranchinfo(head)
+            self._entries.setdefault(branch, []).append(node)
+            if closed:
+                self._closednodes.add(node)
+            touched_branch.add(branch)
+        to_rev = cl.index.rev
+        for branch in touched_branch:
+            self._entries[branch].sort(key=to_rev)
+
     def _compute_key_hashes(self, repo) -> Tuple[bytes]:
         """return the cache key hashes that match this repoview state"""
         return scmutil.filtered_and_obsolete_hash(
--- a/mercurial/configitems.toml	Thu Mar 07 01:35:43 2024 +0100
+++ b/mercurial/configitems.toml	Thu Mar 07 10:55:22 2024 +0100
@@ -720,6 +720,9 @@
 default = "publish"
 
 
+# The current implementation of the filtering/injecting of topological heads is
+# naive and need proper benchmark and optimisation because we can envision
+# moving the the v3 of the branch-cache format out of experimental
 [[items]]
 section = "experimental"
 name = "branch-cache-v3"
--- a/tests/test-branches-obsolete.t	Thu Mar 07 01:35:43 2024 +0100
+++ b/tests/test-branches-obsolete.t	Thu Mar 07 10:55:22 2024 +0100
@@ -158,8 +158,6 @@
   $ show_cache
   ##### .hg/cache/branch3-served
   filtered-hash=f8006d64a10d35c011a5c5fa88be1e25c5929514 tip-node=7c29ff2453bf38c75ee8982935739103c38a9284 tip-rev=7
-  550bb31f072912453ccbb503de1d554616911e88 o default
-  7c29ff2453bf38c75ee8982935739103c38a9284 o default
 #endif
   $ cd ..
 
@@ -215,7 +213,6 @@
   7c29ff2453bf38c75ee8982935739103c38a9284 o default
   ##### .hg/cache/branch3-served
   filtered-hash=f8006d64a10d35c011a5c5fa88be1e25c5929514 obsolete-hash=ac5282439f301518f362f37547fcd52bcc670373 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8
-  550bb31f072912453ccbb503de1d554616911e88 o default
   7c29ff2453bf38c75ee8982935739103c38a9284 o default
 #endif
 
@@ -236,7 +233,6 @@
   $ show_cache
   ##### .hg/cache/branch3-served
   filtered-hash=f8006d64a10d35c011a5c5fa88be1e25c5929514 obsolete-hash=ac5282439f301518f362f37547fcd52bcc670373 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8
-  550bb31f072912453ccbb503de1d554616911e88 o default
   7c29ff2453bf38c75ee8982935739103c38a9284 o default
 #endif
 
@@ -256,8 +252,6 @@
   $ show_cache
   ##### .hg/cache/branch3-served
   filtered-hash=f8006d64a10d35c011a5c5fa88be1e25c5929514 tip-node=7c29ff2453bf38c75ee8982935739103c38a9284 tip-rev=7
-  550bb31f072912453ccbb503de1d554616911e88 o default
-  7c29ff2453bf38c75ee8982935739103c38a9284 o default
 #endif
 
   $ cd ..
@@ -310,7 +304,6 @@
   ##### .hg/cache/branch3-served
   filtered-hash=f1456c0d675980582dda9b8edc7f13f503ce544f obsolete-hash=3e74f5349008671629e39d13d7e00d9ba94c74f7 tip-node=7c29ff2453bf38c75ee8982935739103c38a9284 tip-rev=7
   550bb31f072912453ccbb503de1d554616911e88 o default
-  7c29ff2453bf38c75ee8982935739103c38a9284 o default
 #endif
 
 Even when computing branches from scratch
@@ -331,7 +324,6 @@
   ##### .hg/cache/branch3-served
   filtered-hash=f1456c0d675980582dda9b8edc7f13f503ce544f obsolete-hash=3e74f5349008671629e39d13d7e00d9ba94c74f7 tip-node=7c29ff2453bf38c75ee8982935739103c38a9284 tip-rev=7
   550bb31f072912453ccbb503de1d554616911e88 o default
-  7c29ff2453bf38c75ee8982935739103c38a9284 o default
 #endif
 
 And we can get back to normal
@@ -350,8 +342,6 @@
   $ show_cache
   ##### .hg/cache/branch3-served
   filtered-hash=f8006d64a10d35c011a5c5fa88be1e25c5929514 tip-node=7c29ff2453bf38c75ee8982935739103c38a9284 tip-rev=7
-  550bb31f072912453ccbb503de1d554616911e88 o default
-  7c29ff2453bf38c75ee8982935739103c38a9284 o default
 #endif
 
   $ cd ..
@@ -406,7 +396,6 @@
   $ show_cache
   ##### .hg/cache/branch3-served
   obsolete-hash=ac5282439f301518f362f37547fcd52bcc670373 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8
-  63ba7cd843d1e95aac1a24435befeb1909c53619 o default
   7c29ff2453bf38c75ee8982935739103c38a9284 o default
 #endif
 
@@ -427,7 +416,6 @@
   $ show_cache
   ##### .hg/cache/branch3-served
   obsolete-hash=ac5282439f301518f362f37547fcd52bcc670373 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8
-  63ba7cd843d1e95aac1a24435befeb1909c53619 o default
   7c29ff2453bf38c75ee8982935739103c38a9284 o default
 #endif
 
@@ -447,8 +435,6 @@
   $ show_cache
   ##### .hg/cache/branch3-served
   tip-node=7c29ff2453bf38c75ee8982935739103c38a9284 tip-rev=7
-  63ba7cd843d1e95aac1a24435befeb1909c53619 o default
-  7c29ff2453bf38c75ee8982935739103c38a9284 o default
 #endif
 
   $ cd ..
@@ -492,8 +478,6 @@
   $ show_cache
   ##### .hg/cache/branch3-served
   tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8
-  63ba7cd843d1e95aac1a24435befeb1909c53619 o default
-  3d808bbc94408ea19da905596d4079357a1f28be o default
 #endif
 
   $ hg pull --rev `cat ../main-single-branch-node_B4` --remote-hidden
@@ -518,7 +502,6 @@
   ##### .hg/cache/branch3-served
   filtered-hash=f1456c0d675980582dda9b8edc7f13f503ce544f obsolete-hash=3e74f5349008671629e39d13d7e00d9ba94c74f7 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8
   550bb31f072912453ccbb503de1d554616911e88 o default
-  3d808bbc94408ea19da905596d4079357a1f28be o default
 #endif
 
 Even when computing branches from scratch
@@ -539,7 +522,6 @@
   ##### .hg/cache/branch3-served
   filtered-hash=f1456c0d675980582dda9b8edc7f13f503ce544f obsolete-hash=3e74f5349008671629e39d13d7e00d9ba94c74f7 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8
   550bb31f072912453ccbb503de1d554616911e88 o default
-  3d808bbc94408ea19da905596d4079357a1f28be o default
 #endif
 
 And we can get back to normal
@@ -558,8 +540,6 @@
   $ show_cache
   ##### .hg/cache/branch3-served
   filtered-hash=f8006d64a10d35c011a5c5fa88be1e25c5929514 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8
-  550bb31f072912453ccbb503de1d554616911e88 o default
-  3d808bbc94408ea19da905596d4079357a1f28be o default
 #endif
 
   $ cd ..
--- a/tests/test-branches.t	Thu Mar 07 01:35:43 2024 +0100
+++ b/tests/test-branches.t	Thu Mar 07 10:55:22 2024 +0100
@@ -918,9 +918,18 @@
   $ rm .hg/cache/rbc*
   $ hg log -r "5 & branch(5)" -T "{rev}\n"
   5
+
+(here v3 is querying branch info for heads so it warm much more of the cache)
+
+#if v2
   $ f --size .hg/cache/rbc-*
   .hg/cache/rbc-names-v1: size=1
   .hg/cache/rbc-revs-v1: size=48
+#else
+  $ f --size .hg/cache/rbc-*
+  .hg/cache/rbc-names-v1: size=84
+  .hg/cache/rbc-revs-v1: size=152
+#endif
 
   $ cd ..
 
@@ -1304,7 +1313,6 @@
 #if v3
   $ cat branchmap-update-01/.hg/cache/branch3-base
   tip-node=99ba08759bc7f6fdbe5304e83d0387f35c082479 tip-rev=1
-  99ba08759bc7f6fdbe5304e83d0387f35c082479 o A
 #else
   $ cat branchmap-update-01/.hg/cache/branch2-base
   99ba08759bc7f6fdbe5304e83d0387f35c082479 1
@@ -1320,7 +1328,6 @@
 #if v3
   $ cat branchmap-update-01/.hg/cache/branch3-served
   tip-node=71ca9a6d524ed3c2a215119b2086ac3b8c4c8286 tip-rev=3
-  71ca9a6d524ed3c2a215119b2086ac3b8c4c8286 o A
 #else
   $ cat branchmap-update-01/.hg/cache/branch2-served
   71ca9a6d524ed3c2a215119b2086ac3b8c4c8286 3
@@ -1350,7 +1357,6 @@
 #if v3
   $ cat branchmap-update-02/.hg/cache/branch3-base
   tip-node=99ba08759bc7f6fdbe5304e83d0387f35c082479 tip-rev=1
-  99ba08759bc7f6fdbe5304e83d0387f35c082479 o A
 #else
   $ cat branchmap-update-02/.hg/cache/branch2-base
   99ba08759bc7f6fdbe5304e83d0387f35c082479 1
@@ -1367,7 +1373,6 @@
 #if v3
   $ cat branchmap-update-02/.hg/cache/branch3-base
   tip-node=99ba08759bc7f6fdbe5304e83d0387f35c082479 tip-rev=1
-  99ba08759bc7f6fdbe5304e83d0387f35c082479 o A
 #else
   $ cat branchmap-update-02/.hg/cache/branch2-base
   99ba08759bc7f6fdbe5304e83d0387f35c082479 1