sparse-revlog: stop using a heap to track gaps
authorBoris Feld <boris.feld@octobus.net>
Thu, 08 Nov 2018 16:01:30 +0100
changeset 40607 54de23400b2a
parent 40606 bfbfd15d65bd
child 40608 526ee887c4d5
sparse-revlog: stop using a heap to track gaps The heap doesn't bring any performance advantage as we can simply sort the final list. Moreover, the lesser complexity helps a lot when we later implement it in C.
mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py	Thu Nov 08 15:29:58 2018 +0100
+++ b/mercurial/revlogutils/deltas.py	Thu Nov 08 16:01:30 2018 +0100
@@ -275,8 +275,7 @@
         return
 
     # Store the gaps in a heap to have them sorted by decreasing size
-    gapsheap = []
-    heapq.heapify(gapsheap)
+    gaps = []
     prevend = None
     for i, rev in enumerate(revs):
         revstart = start(rev)
@@ -290,21 +289,23 @@
             gapsize = revstart - prevend
             # only consider holes that are large enough
             if gapsize > mingapsize:
-                heapq.heappush(gapsheap, (-gapsize, i))
+                gaps.append((gapsize, i))
 
         prevend = revstart + revlen
+    # sort the gaps to pop them from largest to small
+    gaps.sort()
 
     # Collect the indices of the largest holes until the density is acceptable
     indicesheap = []
     heapq.heapify(indicesheap)
-    while gapsheap and density < targetdensity:
-        oppgapsize, gapidx = heapq.heappop(gapsheap)
+    while gaps and density < targetdensity:
+        gapsize, gapidx = gaps.pop()
 
         heapq.heappush(indicesheap, gapidx)
 
         # the gap sizes are stored as negatives to be sorted decreasingly
         # by the heap
-        readdata -= (-oppgapsize)
+        readdata -= gapsize
         if readdata > 0:
             density = chainpayload / float(readdata)
         else: