# HG changeset patch # User Pierre-Yves David # Date 1709805322 -3600 # Node ID f85f23f1479bd47684aee0e8b747e1aba81ab7a9 # Parent fc710c993ec992245883ef19fe8cccdf585fb495 branchcache: skip entries that are topological heads in the on disk file In the majority of cases, topological heads are also branch heads. We have efficient way to get the topological heads and efficient way to retrieve their branch information. So there is little value in putting them in the branch cache file explicitly. On the contrary, writing them explicitly tend to create very large cache file that are inefficient to read and update. So the branch cache v3 format is no longer including them. This changeset focus on the format aspect and have no focus on the performance aspect. We will cover that later. diff -r fc710c993ec9 -r f85f23f1479b mercurial/branchmap.py --- a/mercurial/branchmap.py Thu Mar 07 01:35:43 2024 +0100 +++ b/mercurial/branchmap.py Thu Mar 07 10:55:22 2024 +0100 @@ -594,7 +594,7 @@ filename = self._filename(repo) with repo.cachevfs(filename, b"w", atomictemp=True) as f: self._write_header(f) - nodecount = self._write_heads(f) + nodecount = self._write_heads(repo, f) repo.ui.log( b'branchcache', b'wrote %s with %d labels and %d nodes\n', @@ -613,7 +613,7 @@ def _write_header(self, fp) -> None: raise NotImplementedError - def _write_heads(self, fp) -> int: + def _write_heads(self, repo, fp) -> int: """write list of heads to a file Return the number of heads written.""" @@ -827,11 +827,27 @@ The open/closed state is represented by a single letter 'o' or 'c'. This field can be used to avoid changelog reads when determining if a branch head closes a branch or not. + + Topological heads are not included in the listing and should be dispatched + on the right branch at read time. Obsolete topological heads should be + ignored. """ _base_filename = b"branch3" _default_key_hashes = (None, None) + def _get_topo_heads(self, repo) -> List[int]: + """returns the topological head of a repoview content up to self.tiprev""" + cl = repo.changelog + if self.tiprev == nullrev: + return [] + elif self.tiprev == cl.tiprev(): + return cl.headrevs() + else: + # XXX passing tiprev as ceiling of cl.headrevs could be faster + heads = cl.headrevs(cl.revs(stop=self.tiprev)) + return heads + def _write_header(self, fp) -> None: cache_keys = { b"tip-node": hex(self.tipnode), @@ -845,6 +861,27 @@ pieces = (b"%s=%s" % i for i in sorted(cache_keys.items())) fp.write(b" ".join(pieces) + b'\n') + def _write_heads(self, repo, fp) -> int: + """write list of heads to a file + + Return the number of heads written.""" + nodecount = 0 + topo_heads = set(self._get_topo_heads(repo)) + to_rev = repo.changelog.index.rev + for label, nodes in sorted(self._entries.items()): + label = encoding.fromlocal(label) + for node in nodes: + rev = to_rev(node) + if rev in topo_heads: + continue + if node in self._closednodes: + state = b'c' + else: + state = b'o' + nodecount += 1 + fp.write(b"%s %s %s\n" % (hex(node), state, label)) + return nodecount + @classmethod def _load_header(cls, repo, lineiter): header_line = next(lineiter) @@ -869,6 +906,28 @@ args["key_hashes"] = (filtered_hash, obsolete_hash) return args + def _load_heads(self, repo, lineiter): + """fully loads the branchcache by reading from the file using the line + iterator passed""" + super()._load_heads(repo, lineiter) + cl = repo.changelog + getbranchinfo = repo.revbranchcache().branchinfo + obsrevs = obsolete.getrevs(repo, b'obsolete') + to_node = cl.node + touched_branch = set() + for head in self._get_topo_heads(repo): + if head in obsrevs: + continue + node = to_node(head) + branch, closed = getbranchinfo(head) + self._entries.setdefault(branch, []).append(node) + if closed: + self._closednodes.add(node) + touched_branch.add(branch) + to_rev = cl.index.rev + for branch in touched_branch: + self._entries[branch].sort(key=to_rev) + def _compute_key_hashes(self, repo) -> Tuple[bytes]: """return the cache key hashes that match this repoview state""" return scmutil.filtered_and_obsolete_hash( diff -r fc710c993ec9 -r f85f23f1479b mercurial/configitems.toml --- a/mercurial/configitems.toml Thu Mar 07 01:35:43 2024 +0100 +++ b/mercurial/configitems.toml Thu Mar 07 10:55:22 2024 +0100 @@ -720,6 +720,9 @@ default = "publish" +# The current implementation of the filtering/injecting of topological heads is +# naive and need proper benchmark and optimisation because we can envision +# moving the the v3 of the branch-cache format out of experimental [[items]] section = "experimental" name = "branch-cache-v3" diff -r fc710c993ec9 -r f85f23f1479b tests/test-branches-obsolete.t --- a/tests/test-branches-obsolete.t Thu Mar 07 01:35:43 2024 +0100 +++ b/tests/test-branches-obsolete.t Thu Mar 07 10:55:22 2024 +0100 @@ -158,8 +158,6 @@ $ show_cache ##### .hg/cache/branch3-served filtered-hash=f8006d64a10d35c011a5c5fa88be1e25c5929514 tip-node=7c29ff2453bf38c75ee8982935739103c38a9284 tip-rev=7 - 550bb31f072912453ccbb503de1d554616911e88 o default - 7c29ff2453bf38c75ee8982935739103c38a9284 o default #endif $ cd .. @@ -215,7 +213,6 @@ 7c29ff2453bf38c75ee8982935739103c38a9284 o default ##### .hg/cache/branch3-served filtered-hash=f8006d64a10d35c011a5c5fa88be1e25c5929514 obsolete-hash=ac5282439f301518f362f37547fcd52bcc670373 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8 - 550bb31f072912453ccbb503de1d554616911e88 o default 7c29ff2453bf38c75ee8982935739103c38a9284 o default #endif @@ -236,7 +233,6 @@ $ show_cache ##### .hg/cache/branch3-served filtered-hash=f8006d64a10d35c011a5c5fa88be1e25c5929514 obsolete-hash=ac5282439f301518f362f37547fcd52bcc670373 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8 - 550bb31f072912453ccbb503de1d554616911e88 o default 7c29ff2453bf38c75ee8982935739103c38a9284 o default #endif @@ -256,8 +252,6 @@ $ show_cache ##### .hg/cache/branch3-served filtered-hash=f8006d64a10d35c011a5c5fa88be1e25c5929514 tip-node=7c29ff2453bf38c75ee8982935739103c38a9284 tip-rev=7 - 550bb31f072912453ccbb503de1d554616911e88 o default - 7c29ff2453bf38c75ee8982935739103c38a9284 o default #endif $ cd .. @@ -310,7 +304,6 @@ ##### .hg/cache/branch3-served filtered-hash=f1456c0d675980582dda9b8edc7f13f503ce544f obsolete-hash=3e74f5349008671629e39d13d7e00d9ba94c74f7 tip-node=7c29ff2453bf38c75ee8982935739103c38a9284 tip-rev=7 550bb31f072912453ccbb503de1d554616911e88 o default - 7c29ff2453bf38c75ee8982935739103c38a9284 o default #endif Even when computing branches from scratch @@ -331,7 +324,6 @@ ##### .hg/cache/branch3-served filtered-hash=f1456c0d675980582dda9b8edc7f13f503ce544f obsolete-hash=3e74f5349008671629e39d13d7e00d9ba94c74f7 tip-node=7c29ff2453bf38c75ee8982935739103c38a9284 tip-rev=7 550bb31f072912453ccbb503de1d554616911e88 o default - 7c29ff2453bf38c75ee8982935739103c38a9284 o default #endif And we can get back to normal @@ -350,8 +342,6 @@ $ show_cache ##### .hg/cache/branch3-served filtered-hash=f8006d64a10d35c011a5c5fa88be1e25c5929514 tip-node=7c29ff2453bf38c75ee8982935739103c38a9284 tip-rev=7 - 550bb31f072912453ccbb503de1d554616911e88 o default - 7c29ff2453bf38c75ee8982935739103c38a9284 o default #endif $ cd .. @@ -406,7 +396,6 @@ $ show_cache ##### .hg/cache/branch3-served obsolete-hash=ac5282439f301518f362f37547fcd52bcc670373 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8 - 63ba7cd843d1e95aac1a24435befeb1909c53619 o default 7c29ff2453bf38c75ee8982935739103c38a9284 o default #endif @@ -427,7 +416,6 @@ $ show_cache ##### .hg/cache/branch3-served obsolete-hash=ac5282439f301518f362f37547fcd52bcc670373 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8 - 63ba7cd843d1e95aac1a24435befeb1909c53619 o default 7c29ff2453bf38c75ee8982935739103c38a9284 o default #endif @@ -447,8 +435,6 @@ $ show_cache ##### .hg/cache/branch3-served tip-node=7c29ff2453bf38c75ee8982935739103c38a9284 tip-rev=7 - 63ba7cd843d1e95aac1a24435befeb1909c53619 o default - 7c29ff2453bf38c75ee8982935739103c38a9284 o default #endif $ cd .. @@ -492,8 +478,6 @@ $ show_cache ##### .hg/cache/branch3-served tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8 - 63ba7cd843d1e95aac1a24435befeb1909c53619 o default - 3d808bbc94408ea19da905596d4079357a1f28be o default #endif $ hg pull --rev `cat ../main-single-branch-node_B4` --remote-hidden @@ -518,7 +502,6 @@ ##### .hg/cache/branch3-served filtered-hash=f1456c0d675980582dda9b8edc7f13f503ce544f obsolete-hash=3e74f5349008671629e39d13d7e00d9ba94c74f7 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8 550bb31f072912453ccbb503de1d554616911e88 o default - 3d808bbc94408ea19da905596d4079357a1f28be o default #endif Even when computing branches from scratch @@ -539,7 +522,6 @@ ##### .hg/cache/branch3-served filtered-hash=f1456c0d675980582dda9b8edc7f13f503ce544f obsolete-hash=3e74f5349008671629e39d13d7e00d9ba94c74f7 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8 550bb31f072912453ccbb503de1d554616911e88 o default - 3d808bbc94408ea19da905596d4079357a1f28be o default #endif And we can get back to normal @@ -558,8 +540,6 @@ $ show_cache ##### .hg/cache/branch3-served filtered-hash=f8006d64a10d35c011a5c5fa88be1e25c5929514 tip-node=3d808bbc94408ea19da905596d4079357a1f28be tip-rev=8 - 550bb31f072912453ccbb503de1d554616911e88 o default - 3d808bbc94408ea19da905596d4079357a1f28be o default #endif $ cd .. diff -r fc710c993ec9 -r f85f23f1479b tests/test-branches.t --- a/tests/test-branches.t Thu Mar 07 01:35:43 2024 +0100 +++ b/tests/test-branches.t Thu Mar 07 10:55:22 2024 +0100 @@ -918,9 +918,18 @@ $ rm .hg/cache/rbc* $ hg log -r "5 & branch(5)" -T "{rev}\n" 5 + +(here v3 is querying branch info for heads so it warm much more of the cache) + +#if v2 $ f --size .hg/cache/rbc-* .hg/cache/rbc-names-v1: size=1 .hg/cache/rbc-revs-v1: size=48 +#else + $ f --size .hg/cache/rbc-* + .hg/cache/rbc-names-v1: size=84 + .hg/cache/rbc-revs-v1: size=152 +#endif $ cd .. @@ -1304,7 +1313,6 @@ #if v3 $ cat branchmap-update-01/.hg/cache/branch3-base tip-node=99ba08759bc7f6fdbe5304e83d0387f35c082479 tip-rev=1 - 99ba08759bc7f6fdbe5304e83d0387f35c082479 o A #else $ cat branchmap-update-01/.hg/cache/branch2-base 99ba08759bc7f6fdbe5304e83d0387f35c082479 1 @@ -1320,7 +1328,6 @@ #if v3 $ cat branchmap-update-01/.hg/cache/branch3-served tip-node=71ca9a6d524ed3c2a215119b2086ac3b8c4c8286 tip-rev=3 - 71ca9a6d524ed3c2a215119b2086ac3b8c4c8286 o A #else $ cat branchmap-update-01/.hg/cache/branch2-served 71ca9a6d524ed3c2a215119b2086ac3b8c4c8286 3 @@ -1350,7 +1357,6 @@ #if v3 $ cat branchmap-update-02/.hg/cache/branch3-base tip-node=99ba08759bc7f6fdbe5304e83d0387f35c082479 tip-rev=1 - 99ba08759bc7f6fdbe5304e83d0387f35c082479 o A #else $ cat branchmap-update-02/.hg/cache/branch2-base 99ba08759bc7f6fdbe5304e83d0387f35c082479 1 @@ -1367,7 +1373,6 @@ #if v3 $ cat branchmap-update-02/.hg/cache/branch3-base tip-node=99ba08759bc7f6fdbe5304e83d0387f35c082479 tip-rev=1 - 99ba08759bc7f6fdbe5304e83d0387f35c082479 o A #else $ cat branchmap-update-02/.hg/cache/branch2-base 99ba08759bc7f6fdbe5304e83d0387f35c082479 1