snapshot: consider all snapshots in the parents' chains
authorBoris Feld <boris.feld@octobus.net>
Fri, 07 Sep 2018 11:17:30 -0400
changeset 39494 e72130f58f5d
parent 39493 3ca144f1c8dd
child 39495 6a53842727c1
snapshot: consider all snapshots in the parents' chains There are no reasons to only consider full snapshot as a possible base for an intermediate snapshot. Now that the basic principles have been set, we can start adding more levels of snapshots. We now consider all snapshots in the parent's chains (full or intermediate). This creates a chain of intermediate snapshots, each smaller than the previous one. # Effect On The Test Repository In the test repository, we can see a decrease in the revlog size and slightly shorter delta chain. However, that approach creates snapshots more frequently, increasing the risk of ending into problematic cases in very branchy repositories (not triggered by the test repository). The next changesets will remove that risk by adding logic that increases deltas reuse.
mercurial/revlogutils/deltas.py
tests/test-sparse-revlog.t
--- a/mercurial/revlogutils/deltas.py	Fri Sep 07 11:17:30 2018 -0400
+++ b/mercurial/revlogutils/deltas.py	Fri Sep 07 11:17:30 2018 -0400
@@ -661,12 +661,23 @@
             yield parents
 
     if sparse and parents:
+        snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
         # See if we can use an existing snapshot in the parent chains to use as
         # a base for a new intermediate-snapshot
-        bases = []
+        #
+        # search for snapshot in parents delta chain
+        # map: snapshot-level: snapshot-rev
+        parents_snaps = collections.defaultdict(set)
         for p in parents:
-            bases.append(deltachain(p)[0])
-        yield tuple(sorted(bases))
+            for idx, s in enumerate(deltachain(p)):
+                if not revlog.issnapshot(s):
+                    break
+                parents_snaps[idx].add(s)
+        # Test them as possible intermediate snapshot base
+        # We test them from highest to lowest level. High level one are more
+        # likely to result in small delta
+        for idx, snaps in sorted(parents_snaps.items(), reverse=True):
+            yield tuple(sorted(snaps))
         # No suitable base found in the parent chain, search if any full
         # snapshots emitted since parent's base would be a suitable base for an
         # intermediate snapshot.
@@ -675,8 +686,7 @@
         # revisions instead of starting our own. Without such re-use,
         # topological branches would keep reopening new full chains. Creating
         # more and more snapshot as the repository grow.
-        snapfloor = min(bases) + 1
-        snapshots = collections.defaultdict(list)
+        snapfloor = min(parents_snaps[0]) + 1
         _findsnapshots(revlog, snapshots, snapfloor)
         yield tuple(snapshots[nullrev])
 
--- a/tests/test-sparse-revlog.t	Fri Sep 07 11:17:30 2018 -0400
+++ b/tests/test-sparse-revlog.t	Fri Sep 07 11:17:30 2018 -0400
@@ -77,7 +77,7 @@
   
 
   $ f -s .hg/store/data/*.d
-  .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=67810463
+  .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=63812493
   $ hg debugrevlog *
   format : 1
   flags  : generaldelta
@@ -89,39 +89,45 @@
       empty     :        0 ( 0.00%)
                      text  :        0 (100.00%)
                      delta :        0 (100.00%)
-      snapshot  :      126 ( 2.52%)
+      snapshot  :      179 ( 3.58%)
         lvl-0   :              4 ( 0.08%)
-        lvl-1   :            120 ( 2.40%)
-        lvl-2   :              2 ( 0.04%)
-      deltas    :     4875 (97.48%)
-  revision size : 67810463
-      snapshot  : 14373347 (21.20%)
-        lvl-0   :         804235 ( 1.19%)
-        lvl-1   :       13535903 (19.96%)
-        lvl-2   :          33209 ( 0.05%)
-      deltas    : 53437116 (78.80%)
+        lvl-1   :             49 ( 0.98%)
+        lvl-2   :             56 ( 1.12%)
+        lvl-3   :             63 ( 1.26%)
+        lvl-4   :              7 ( 0.14%)
+      deltas    :     4822 (96.42%)
+  revision size : 63812493
+      snapshot  : 10745878 (16.84%)
+        lvl-0   :         804204 ( 1.26%)
+        lvl-1   :        4986508 ( 7.81%)
+        lvl-2   :        2858810 ( 4.48%)
+        lvl-3   :        1958906 ( 3.07%)
+        lvl-4   :         137450 ( 0.22%)
+      deltas    : 53066615 (83.16%)
   
   chunks        :     5001
       0x78 (x)  :     5001 (100.00%)
-  chunks size   : 67810463
-      0x78 (x)  : 67810463 (100.00%)
+  chunks size   : 63812493
+      0x78 (x)  : 63812493 (100.00%)
   
-  avg chain length  :       18
+  avg chain length  :       17
   max chain length  :       45
-  max chain reach   : 25808240
-  compression ratio :       25
+  max chain reach   : 27506743
+  compression ratio :       27
   
   uncompressed data size (min/max/avg) : 346468 / 346472 / 346471
-  full revision size (min/max/avg)     : 201014 / 201116 / 201058
-  inter-snapshot size (min/max/avg)    : 11623 / 173150 / 111222
-      level-1   (min/max/avg)          : 11623 / 173150 / 112799
-      level-2   (min/max/avg)          : 14151 / 19058 / 16604
-  delta size (min/max/avg)             : 10649 / 101790 / 10961
+  full revision size (min/max/avg)     : 201007 / 201077 / 201051
+  inter-snapshot size (min/max/avg)    : 13223 / 172097 / 56809
+      level-1   (min/max/avg)          : 13223 / 172097 / 101765
+      level-2   (min/max/avg)          : 28558 / 85237 / 51050
+      level-3   (min/max/avg)          : 14647 / 41752 / 31093
+      level-4   (min/max/avg)          : 18596 / 20760 / 19635
+  delta size (min/max/avg)             : 10649 / 104791 / 11005
   
-  deltas against prev  : 4207 (86.30%)
-      where prev = p1  : 4164     (98.98%)
+  deltas against prev  : 4171 (86.50%)
+      where prev = p1  : 4127     (98.95%)
       where prev = p2  :    0     ( 0.00%)
-      other            :   43     ( 1.02%)
-  deltas against p1    :  653 (13.39%)
-  deltas against p2    :   15 ( 0.31%)
+      other            :   44     ( 1.05%)
+  deltas against p1    :  633 (13.13%)
+  deltas against p2    :   18 ( 0.37%)
   deltas against other :    0 ( 0.00%)