# HG changeset patch # User Boris Feld # Date 1536333451 14400 # Node ID 6a53842727c1c05b6d0a9bd2f85145adf7b0e846 # Parent e72130f58f5d22445f96bb5304e80444ff097a9c snapshot: consider unrelated snapshots at a similar level first This new step is inserted before considering using a level-N snapshot as a base for a level-N+1 snapshot. We first check if existing level-N+1 snapshots using the same base would be a suitable base for a level-N+2 snapshot. This increases snapshot reuse and limits the risk of snapshot explosion in very branchy repositories. Using a "deeper" snapshot as the base also results in a smaller snapshot since it builds a level-N+2 intermediate snapshot instead of an N+1 one. This logic is similar for the one we added in a previous commit. In that previous commit is only applied to level-0 "siblings". We can see this effect in the test repository. Snapshots moved from lower levels to higher levels. diff -r e72130f58f5d -r 6a53842727c1 mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py Fri Sep 07 11:17:30 2018 -0400 +++ b/mercurial/revlogutils/deltas.py Fri Sep 07 11:17:31 2018 -0400 @@ -616,9 +616,10 @@ def _findsnapshots(revlog, cache, start_rev): """find snapshot from start_rev to tip""" deltaparent = revlog.deltaparent + issnapshot = revlog.issnapshot for rev in revlog.revs(start_rev): - if deltaparent(rev) == nullrev: - cache[nullrev].append(rev) + if issnapshot(rev): + cache[deltaparent(rev)].append(rev) def _rawgroups(revlog, p1, p2, cachedelta): """Provides group of revision to be tested as delta base @@ -673,11 +674,35 @@ if not revlog.issnapshot(s): break parents_snaps[idx].add(s) + snapfloor = min(parents_snaps[0]) + 1 + _findsnapshots(revlog, snapshots, snapfloor) # Test them as possible intermediate snapshot base # We test them from highest to lowest level. High level one are more # likely to result in small delta + floor = None for idx, snaps in sorted(parents_snaps.items(), reverse=True): + siblings = set() + for s in snaps: + siblings.update(snapshots[s]) + # Before considering making a new intermediate snapshot, we check + # if an existing snapshot, children of base we consider, would be + # suitable. + # + # It give a change to reuse a delta chain "unrelated" to the + # current revision instead of starting our own. Without such + # re-use, topological branches would keep reopening new chains. + # Creating more and more snapshot as the repository grow. + + if floor is not None: + # We only do this for siblings created after the one in our + # parent's delta chain. Those created before has less chances + # to be valid base since our ancestors had to create a new + # snapshot. + siblings = [r for r in siblings if floor < r] + yield tuple(sorted(siblings)) + # then test the base from our parent's delta chain. yield tuple(sorted(snaps)) + floor = min(snaps) # No suitable base found in the parent chain, search if any full # snapshots emitted since parent's base would be a suitable base for an # intermediate snapshot. @@ -686,8 +711,6 @@ # revisions instead of starting our own. Without such re-use, # topological branches would keep reopening new full chains. Creating # more and more snapshot as the repository grow. - snapfloor = min(parents_snaps[0]) + 1 - _findsnapshots(revlog, snapshots, snapfloor) yield tuple(snapshots[nullrev]) # other approach failed try against prev to hopefully save us a diff -r e72130f58f5d -r 6a53842727c1 tests/test-sparse-revlog.t --- a/tests/test-sparse-revlog.t Fri Sep 07 11:17:30 2018 -0400 +++ b/tests/test-sparse-revlog.t Fri Sep 07 11:17:31 2018 -0400 @@ -77,7 +77,7 @@ $ f -s .hg/store/data/*.d - .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=63812493 + .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=59303048 $ hg debugrevlog * format : 1 flags : generaldelta @@ -89,45 +89,45 @@ empty : 0 ( 0.00%) text : 0 (100.00%) delta : 0 (100.00%) - snapshot : 179 ( 3.58%) + snapshot : 165 ( 3.30%) lvl-0 : 4 ( 0.08%) - lvl-1 : 49 ( 0.98%) - lvl-2 : 56 ( 1.12%) - lvl-3 : 63 ( 1.26%) - lvl-4 : 7 ( 0.14%) - deltas : 4822 (96.42%) - revision size : 63812493 - snapshot : 10745878 (16.84%) - lvl-0 : 804204 ( 1.26%) - lvl-1 : 4986508 ( 7.81%) - lvl-2 : 2858810 ( 4.48%) - lvl-3 : 1958906 ( 3.07%) - lvl-4 : 137450 ( 0.22%) - deltas : 53066615 (83.16%) + lvl-1 : 17 ( 0.34%) + lvl-2 : 46 ( 0.92%) + lvl-3 : 62 ( 1.24%) + lvl-4 : 36 ( 0.72%) + deltas : 4836 (96.70%) + revision size : 59303048 + snapshot : 6105443 (10.30%) + lvl-0 : 804187 ( 1.36%) + lvl-1 : 1476228 ( 2.49%) + lvl-2 : 1752567 ( 2.96%) + lvl-3 : 1461776 ( 2.46%) + lvl-4 : 610685 ( 1.03%) + deltas : 53197605 (89.70%) chunks : 5001 0x78 (x) : 5001 (100.00%) - chunks size : 63812493 - 0x78 (x) : 63812493 (100.00%) + chunks size : 59303048 + 0x78 (x) : 59303048 (100.00%) avg chain length : 17 max chain length : 45 - max chain reach : 27506743 - compression ratio : 27 + max chain reach : 26194433 + compression ratio : 29 uncompressed data size (min/max/avg) : 346468 / 346472 / 346471 - full revision size (min/max/avg) : 201007 / 201077 / 201051 - inter-snapshot size (min/max/avg) : 13223 / 172097 / 56809 - level-1 (min/max/avg) : 13223 / 172097 / 101765 - level-2 (min/max/avg) : 28558 / 85237 / 51050 - level-3 (min/max/avg) : 14647 / 41752 / 31093 - level-4 (min/max/avg) : 18596 / 20760 / 19635 - delta size (min/max/avg) : 10649 / 104791 / 11005 + full revision size (min/max/avg) : 200992 / 201080 / 201046 + inter-snapshot size (min/max/avg) : 11610 / 172762 / 32927 + level-1 (min/max/avg) : 15619 / 172762 / 86836 + level-2 (min/max/avg) : 13055 / 85219 / 38099 + level-3 (min/max/avg) : 11610 / 42645 / 23577 + level-4 (min/max/avg) : 12928 / 20205 / 16963 + delta size (min/max/avg) : 10649 / 106863 / 11000 - deltas against prev : 4171 (86.50%) - where prev = p1 : 4127 (98.95%) + deltas against prev : 4162 (86.06%) + where prev = p1 : 4120 (98.99%) where prev = p2 : 0 ( 0.00%) - other : 44 ( 1.05%) - deltas against p1 : 633 (13.13%) - deltas against p2 : 18 ( 0.37%) + other : 42 ( 1.01%) + deltas against p1 : 653 (13.50%) + deltas against p2 : 21 ( 0.43%) deltas against other : 0 ( 0.00%)