sparse-revlog: skip the span check in the sparse-revlog case
authorBoris Feld <boris.feld@octobus.net>
Mon, 15 Oct 2018 15:45:08 +0200
changeset 40603 2f7e531ef3e7
parent 40602 c36175456350
child 40604 3ac23dad6364
sparse-revlog: skip the span check in the sparse-revlog case This significantly improves the performance on unbundling on smaller repositories. Mercurial: unbundling 1K revisions no-sparse-revlog: 500 ms sparse-revlog-before: 689 ms sparse-revlog-after: 484 ms Pypy: unbundling 1K revisions no-sparse-revlog: 1.242 s sparse-revlog-before: 1.135 s sparse-revlog-after: 0.860 s NetBeans: unbundling 1K revisions no-sparse-revlog: 1.386 s sparse-revlog-before: 2.368 s sparse-revlog-after: 1.191 s Mozilla: unbundling 1K revisions no-sparse-revlog: 3.103 s sparse-revlog-before: 3.367 s sparse-revlog-after: 3.093 s
mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py	Wed Oct 17 17:15:42 2018 -0400
+++ b/mercurial/revlogutils/deltas.py	Mon Oct 15 15:45:08 2018 +0200
@@ -489,45 +489,23 @@
     #   deltas we need to apply -- bounding it limits the amount of CPU
     #   we consume.
 
-    if revlog._sparserevlog:
-        # As sparse-read will be used, we can consider that the distance,
-        # instead of being the span of the whole chunk,
-        # is the span of the largest read chunk
-        base = deltainfo.base
-
-        if base != nullrev:
-            deltachain = revlog._deltachain(base)[0]
-        else:
-            deltachain = []
-
-        # search for the first non-snapshot revision
-        for idx, r in enumerate(deltachain):
-            if not revlog.issnapshot(r):
-                break
-        deltachain = deltachain[idx:]
-        chunks = slicechunk(revlog, deltachain, deltainfo)
-        all_span = [segmentspan(revlog, revs, deltainfo)
-                    for revs in chunks]
-        distance = max(all_span)
-    else:
-        distance = deltainfo.distance
-
     textlen = revinfo.textlen
     defaultmax = textlen * 4
     maxdist = revlog._maxdeltachainspan
     if not maxdist:
-        maxdist = distance # ensure the conditional pass
+        maxdist = deltainfo.distance # ensure the conditional pass
     maxdist = max(maxdist, defaultmax)
-    if revlog._sparserevlog and maxdist < revlog._srmingapsize:
-        # In multiple place, we are ignoring irrelevant data range below a
-        # certain size. Be also apply this tradeoff here and relax span
-        # constraint for small enought content.
-        maxdist = revlog._srmingapsize
 
     # Bad delta from read span:
     #
     #   If the span of data read is larger than the maximum allowed.
-    if maxdist < distance:
+    #
+    #   In the sparse-revlog case, we rely on the associated "sparse reading"
+    #   to avoid issue related to the span of data. In theory, it would be
+    #   possible to build pathological revlog where delta pattern would lead
+    #   to too many reads. However, they do not happen in practice at all. So
+    #   we skip the span check entirely.
+    if not revlog._sparserevlog and maxdist < deltainfo.distance:
         return False
 
     # Bad delta from new delta size: