mercurial/revlog.py
changeset 38718 f8762ea73e0d
parent 38717 aa21a9ad46ea
child 38736 93777d16a25d
equal deleted inserted replaced
38717:aa21a9ad46ea 38718:f8762ea73e0d
   214         return self._data[rev]
   214         return self._data[rev]
   215 
   215 
   216     def length(self, rev):
   216     def length(self, rev):
   217         return self.end(rev) - self.start(rev)
   217         return self.end(rev) - self.start(rev)
   218 
   218 
       
   219     def __len__(self):
       
   220         return len(self._data)
       
   221 
   219 def _trimchunk(revlog, revs, startidx, endidx=None):
   222 def _trimchunk(revlog, revs, startidx, endidx=None):
   220     """returns revs[startidx:endidx] without empty trailing revs
   223     """returns revs[startidx:endidx] without empty trailing revs
   221 
   224 
   222     Doctest Setup
   225     Doctest Setup
   223     >>> revlog = _testrevlog([
   226     >>> revlog = _testrevlog([
   291     """
   294     """
   292     if not revs:
   295     if not revs:
   293         return 0
   296         return 0
   294     return revlog.end(revs[-1]) - revlog.start(revs[0])
   297     return revlog.end(revs[-1]) - revlog.start(revs[0])
   295 
   298 
   296 def _slicechunk(revlog, revs, targetsize=None):
   299 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
   297     """slice revs to reduce the amount of unrelated data to be read from disk.
   300     """slice revs to reduce the amount of unrelated data to be read from disk.
   298 
   301 
   299     ``revs`` is sliced into groups that should be read in one time.
   302     ``revs`` is sliced into groups that should be read in one time.
   300     Assume that revs are sorted.
   303     Assume that revs are sorted.
   301 
   304 
   339     [[0], [11, 13, 15]]
   342     [[0], [11, 13, 15]]
   340     >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
   343     >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
   341     [[1, 2], [5, 8, 10, 11], [14]]
   344     [[1, 2], [5, 8, 10, 11], [14]]
   342 
   345 
   343     Slicing with a maximum chunk size
   346     Slicing with a maximum chunk size
   344     >>> list(_slicechunk(revlog, [0, 11, 13, 15], 15))
   347     >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
   345     [[0], [11], [13], [15]]
   348     [[0], [11], [13], [15]]
   346     >>> list(_slicechunk(revlog, [0, 11, 13, 15], 20))
   349     >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
   347     [[0], [11], [13, 15]]
   350     [[0], [11], [13, 15]]
   348     """
   351     """
   349     if targetsize is not None:
   352     if targetsize is not None:
   350         targetsize = max(targetsize, revlog._srmingapsize)
   353         targetsize = max(targetsize, revlog._srmingapsize)
       
   354     # targetsize should not be specified when evaluating delta candidates:
       
   355     # * targetsize is used to ensure we stay within specification when reading,
       
   356     # * deltainfo is used to pick are good delta chain when writing.
       
   357     if not (deltainfo is None or targetsize is None):
       
   358         msg = 'cannot use `targetsize` with a `deltainfo`'
       
   359         raise error.ProgrammingError(msg)
   351     for chunk in _slicechunktodensity(revlog, revs,
   360     for chunk in _slicechunktodensity(revlog, revs,
       
   361                                       deltainfo,
   352                                       revlog._srdensitythreshold,
   362                                       revlog._srdensitythreshold,
   353                                       revlog._srmingapsize):
   363                                       revlog._srmingapsize):
   354         for subchunk in _slicechunktosize(revlog, chunk, targetsize):
   364         for subchunk in _slicechunktosize(revlog, chunk, targetsize):
   355             yield subchunk
   365             yield subchunk
   356 
   366 
   357 def _slicechunktosize(revlog, revs, targetsize):
   367 def _slicechunktosize(revlog, revs, targetsize=None):
   358     """slice revs to match the target size
   368     """slice revs to match the target size
   359 
   369 
   360     This is intended to be used on chunk that density slicing selected by that
   370     This is intended to be used on chunk that density slicing selected by that
   361     are still too large compared to the read garantee of revlog. This might
   371     are still too large compared to the read garantee of revlog. This might
   362     happens when "minimal gap size" interrupted the slicing or when chain are
   372     happens when "minimal gap size" interrupted the slicing or when chain are
   429             startrevidx = idx
   439             startrevidx = idx
   430             startdata = revlog.start(r)
   440             startdata = revlog.start(r)
   431             endrevidx = idx
   441             endrevidx = idx
   432     yield _trimchunk(revlog, revs, startrevidx)
   442     yield _trimchunk(revlog, revs, startrevidx)
   433 
   443 
   434 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
   444 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
       
   445                          mingapsize=0):
   435     """slice revs to reduce the amount of unrelated data to be read from disk.
   446     """slice revs to reduce the amount of unrelated data to be read from disk.
   436 
   447 
   437     ``revs`` is sliced into groups that should be read in one time.
   448     ``revs`` is sliced into groups that should be read in one time.
   438     Assume that revs are sorted.
   449     Assume that revs are sorted.
       
   450 
       
   451     ``deltainfo`` is a _deltainfo instance of a revision that we would append
       
   452     to the top of the revlog.
   439 
   453 
   440     The initial chunk is sliced until the overall density (payload/chunks-span
   454     The initial chunk is sliced until the overall density (payload/chunks-span
   441     ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
   455     ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
   442     skipped.
   456     skipped.
   443 
   457 
   485 
   499 
   486     if len(revs) <= 1:
   500     if len(revs) <= 1:
   487         yield revs
   501         yield revs
   488         return
   502         return
   489 
   503 
   490     readdata = deltachainspan = _segmentspan(revlog, revs)
   504     nextrev = len(revlog)
       
   505     nextoffset = revlog.end(nextrev - 1)
       
   506 
       
   507     if deltainfo is None:
       
   508         deltachainspan = _segmentspan(revlog, revs)
       
   509         chainpayload = sum(length(r) for r in revs)
       
   510     else:
       
   511         deltachainspan = deltainfo.distance
       
   512         chainpayload = deltainfo.compresseddeltalen
   491 
   513 
   492     if deltachainspan < mingapsize:
   514     if deltachainspan < mingapsize:
   493         yield revs
   515         yield revs
   494         return
   516         return
   495 
   517 
   496     chainpayload = sum(length(r) for r in revs)
   518     readdata = deltachainspan
   497 
   519 
   498     if deltachainspan:
   520     if deltachainspan:
   499         density = chainpayload / float(deltachainspan)
   521         density = chainpayload / float(deltachainspan)
   500     else:
   522     else:
   501         density = 1.0
   523         density = 1.0
   502 
   524 
   503     if density >= targetdensity:
   525     if density >= targetdensity:
   504         yield revs
   526         yield revs
   505         return
   527         return
       
   528 
       
   529     if deltainfo is not None:
       
   530         revs = list(revs)
       
   531         revs.append(nextrev)
   506 
   532 
   507     # Store the gaps in a heap to have them sorted by decreasing size
   533     # Store the gaps in a heap to have them sorted by decreasing size
   508     gapsheap = []
   534     gapsheap = []
   509     heapq.heapify(gapsheap)
   535     heapq.heapify(gapsheap)
   510     prevend = None
   536     prevend = None
   511     for i, rev in enumerate(revs):
   537     for i, rev in enumerate(revs):
   512         revstart = start(rev)
   538         if rev < nextrev:
   513         revlen = length(rev)
   539             revstart = start(rev)
       
   540             revlen = length(rev)
       
   541         else:
       
   542             revstart = nextoffset
       
   543             revlen = deltainfo.deltalen
   514 
   544 
   515         # Skip empty revisions to form larger holes
   545         # Skip empty revisions to form larger holes
   516         if revlen == 0:
   546         if revlen == 0:
   517             continue
   547             continue
   518 
   548 
  1987         ladd = l.append
  2017         ladd = l.append
  1988 
  2018 
  1989         if not self._withsparseread:
  2019         if not self._withsparseread:
  1990             slicedchunks = (revs,)
  2020             slicedchunks = (revs,)
  1991         else:
  2021         else:
  1992             slicedchunks = _slicechunk(self, revs, targetsize)
  2022             slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
  1993 
  2023 
  1994         for revschunk in slicedchunks:
  2024         for revschunk in slicedchunks:
  1995             firstrev = revschunk[0]
  2025             firstrev = revschunk[0]
  1996             # Skip trailing revisions with empty diff
  2026             # Skip trailing revisions with empty diff
  1997             for lastrev in revschunk[::-1]:
  2027             for lastrev in revschunk[::-1]:
  2400         #   bounding it limits the amount of I/O we need to do.
  2430         #   bounding it limits the amount of I/O we need to do.
  2401         # - 'deltainfo.compresseddeltalen' is the sum of the total size of
  2431         # - 'deltainfo.compresseddeltalen' is the sum of the total size of
  2402         #   deltas we need to apply -- bounding it limits the amount of CPU
  2432         #   deltas we need to apply -- bounding it limits the amount of CPU
  2403         #   we consume.
  2433         #   we consume.
  2404 
  2434 
  2405         distance = deltainfo.distance
  2435         if self._sparserevlog:
       
  2436             # As sparse-read will be used, we can consider that the distance,
       
  2437             # instead of being the span of the whole chunk,
       
  2438             # is the span of the largest read chunk
       
  2439             base = deltainfo.base
       
  2440 
       
  2441             if base != nullrev:
       
  2442                 deltachain = self._deltachain(base)[0]
       
  2443             else:
       
  2444                 deltachain = []
       
  2445 
       
  2446             chunks = _slicechunk(self, deltachain, deltainfo)
       
  2447             distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
       
  2448         else:
       
  2449             distance = deltainfo.distance
       
  2450 
  2406         textlen = revinfo.textlen
  2451         textlen = revinfo.textlen
  2407         defaultmax = textlen * 4
  2452         defaultmax = textlen * 4
  2408         maxdist = self._maxdeltachainspan
  2453         maxdist = self._maxdeltachainspan
  2409         if not maxdist:
  2454         if not maxdist:
  2410             maxdist = distance # ensure the conditional pass
  2455             maxdist = distance # ensure the conditional pass
  2411         maxdist = max(maxdist, defaultmax)
  2456         maxdist = max(maxdist, defaultmax)
       
  2457         if self._sparserevlog and maxdist < self._srmingapsize:
       
  2458             # In multiple place, we are ignoring irrelevant data range below a
       
  2459             # certain size. Be also apply this tradeoff here and relax span
       
  2460             # constraint for small enought content.
       
  2461             maxdist = self._srmingapsize
  2412         if (distance > maxdist or deltainfo.deltalen > textlen or
  2462         if (distance > maxdist or deltainfo.deltalen > textlen or
  2413             deltainfo.compresseddeltalen > textlen * 2 or
  2463             deltainfo.compresseddeltalen > textlen * 2 or
  2414             (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
  2464             (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
  2415             return False
  2465             return False
  2416 
  2466