mercurial: comparison mercurial/revlog.py

equal deleted inserted replaced

-:aa21a9ad46ea
+:f8762ea73e0d
 return self._data[rev]
 def length(self, rev):
 return self.end(rev) - self.start(rev)
+def __len__(self):
+return len(self._data)
 def _trimchunk(revlog, revs, startidx, endidx=None):
 """returns revs[startidx:endidx] without empty trailing revs
 Doctest Setup
 >>> revlog = _testrevlog([
 """
 if not revs:
 return 0
 return revlog.end(revs[-1]) - revlog.start(revs[0])
-def _slicechunk(revlog, revs, targetsize=None):
+def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
 """slice revs to reduce the amount of unrelated data to be read from disk.
 ``revs`` is sliced into groups that should be read in one time.
 Assume that revs are sorted.
 [[0], [11, 13, 15]]
 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
 [[1, 2], [5, 8, 10, 11], [14]]
 Slicing with a maximum chunk size
->>> list(_slicechunk(revlog, [0, 11, 13, 15], 15))
+>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
 [[0], [11], [13], [15]]
->>> list(_slicechunk(revlog, [0, 11, 13, 15], 20))
+>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
 [[0], [11], [13, 15]]
 """
 if targetsize is not None:
 targetsize = max(targetsize, revlog._srmingapsize)
+# targetsize should not be specified when evaluating delta candidates:
+# * targetsize is used to ensure we stay within specification when reading,
+# * deltainfo is used to pick are good delta chain when writing.
+if not (deltainfo is None or targetsize is None):
+msg = 'cannot use `targetsize` with a `deltainfo`'
+raise error.ProgrammingError(msg)
 for chunk in _slicechunktodensity(revlog, revs,
+deltainfo,
 revlog._srdensitythreshold,
 revlog._srmingapsize):
 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
 yield subchunk
-def _slicechunktosize(revlog, revs, targetsize):
+def _slicechunktosize(revlog, revs, targetsize=None):
 """slice revs to match the target size
 This is intended to be used on chunk that density slicing selected by that
 are still too large compared to the read garantee of revlog. This might
 happens when "minimal gap size" interrupted the slicing or when chain are
 startrevidx = idx
 startdata = revlog.start(r)
 endrevidx = idx
 yield _trimchunk(revlog, revs, startrevidx)
-def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
+def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
+mingapsize=0):
 """slice revs to reduce the amount of unrelated data to be read from disk.
 ``revs`` is sliced into groups that should be read in one time.
 Assume that revs are sorted.
+``deltainfo`` is a _deltainfo instance of a revision that we would append
+to the top of the revlog.
 The initial chunk is sliced until the overall density (payload/chunks-span
 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
 skipped.
 if len(revs) <= 1:
 yield revs
 return
-readdata = deltachainspan = _segmentspan(revlog, revs)
+nextrev = len(revlog)
+nextoffset = revlog.end(nextrev - 1)
+if deltainfo is None:
+deltachainspan = _segmentspan(revlog, revs)
+chainpayload = sum(length(r) for r in revs)
+else:
+deltachainspan = deltainfo.distance
+chainpayload = deltainfo.compresseddeltalen
 if deltachainspan < mingapsize:
 yield revs
 return
-chainpayload = sum(length(r) for r in revs)
+readdata = deltachainspan
 if deltachainspan:
 density = chainpayload / float(deltachainspan)
 else:
 density = 1.0
 if density >= targetdensity:
 yield revs
 return
+if deltainfo is not None:
+revs = list(revs)
+revs.append(nextrev)
 # Store the gaps in a heap to have them sorted by decreasing size
 gapsheap = []
 heapq.heapify(gapsheap)
 prevend = None
 for i, rev in enumerate(revs):
-revstart = start(rev)
+if rev < nextrev:
-revlen = length(rev)
+revstart = start(rev)
+revlen = length(rev)
+else:
+revstart = nextoffset
+revlen = deltainfo.deltalen
 # Skip empty revisions to form larger holes
 if revlen == 0:
 continue
 ladd = l.append
 if not self._withsparseread:
 slicedchunks = (revs,)
 else:
-slicedchunks = _slicechunk(self, revs, targetsize)
+slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
 for revschunk in slicedchunks:
 firstrev = revschunk[0]
 # Skip trailing revisions with empty diff
 for lastrev in revschunk[::-1]:
 #   bounding it limits the amount of I/O we need to do.
 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
 #   deltas we need to apply -- bounding it limits the amount of CPU
 #   we consume.
-distance = deltainfo.distance
+if self._sparserevlog:
+# As sparse-read will be used, we can consider that the distance,
+# instead of being the span of the whole chunk,
+# is the span of the largest read chunk
+base = deltainfo.base
+if base != nullrev:
+deltachain = self._deltachain(base)[0]
+else:
+deltachain = []
+chunks = _slicechunk(self, deltachain, deltainfo)
+distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
+else:
+distance = deltainfo.distance
 textlen = revinfo.textlen
 defaultmax = textlen * 4
 maxdist = self._maxdeltachainspan
 if not maxdist:
 maxdist = distance # ensure the conditional pass
 maxdist = max(maxdist, defaultmax)
+if self._sparserevlog and maxdist < self._srmingapsize:
+# In multiple place, we are ignoring irrelevant data range below a
+# certain size. Be also apply this tradeoff here and relax span
+# constraint for small enought content.
+maxdist = self._srmingapsize
 if (distance > maxdist or deltainfo.deltalen > textlen or
 deltainfo.compresseddeltalen > textlen * 2 or
 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
 return False

changeset 38718	f8762ea73e0d
parent 38717	aa21a9ad46ea
child 38736	93777d16a25d