mercurial/revlog.py
changeset 51108 0250e45040f1
parent 51107 c2d2e5b65def
child 51118 98910135a3bc
equal deleted inserted replaced
51107:c2d2e5b65def 51108:0250e45040f1
   293     # how much data is large
   293     # how much data is large
   294     mmap_index_threshold = attr.ib(default=None)
   294     mmap_index_threshold = attr.ib(default=None)
   295     # How much data to read and cache into the raw revlog data cache.
   295     # How much data to read and cache into the raw revlog data cache.
   296     chunk_cache_size = attr.ib(default=65536)
   296     chunk_cache_size = attr.ib(default=65536)
   297 
   297 
       
   298     # The size of the uncompressed cache compared to the largest revision seen.
       
   299     uncompressed_cache_factor = attr.ib(default=None)
       
   300 
       
   301     # The number of chunk cached
       
   302     uncompressed_cache_count = attr.ib(default=None)
       
   303 
   298     # Allow sparse reading of the revlog data
   304     # Allow sparse reading of the revlog data
   299     with_sparse_read = attr.ib(default=False)
   305     with_sparse_read = attr.ib(default=False)
   300     # minimal density of a sparse read chunk
   306     # minimal density of a sparse read chunk
   301     sr_density_threshold = attr.ib(default=0.50)
   307     sr_density_threshold = attr.ib(default=0.50)
   302     # minimal size of data we skip when performing sparse read
   308     # minimal size of data we skip when performing sparse read
   394         # revlog header -> revlog compressor
   400         # revlog header -> revlog compressor
   395         self._decompressors = {}
   401         self._decompressors = {}
   396         # 3-tuple of (node, rev, text) for a raw revision.
   402         # 3-tuple of (node, rev, text) for a raw revision.
   397         self._revisioncache = None
   403         self._revisioncache = None
   398 
   404 
       
   405         # cache some uncompressed chunks
       
   406         # rev → uncompressed_chunk
       
   407         #
       
   408         # the max cost is dynamically updated to be proportionnal to the
       
   409         # size of revision we actually encounter.
       
   410         self._uncompressed_chunk_cache = None
       
   411         if self.data_config.uncompressed_cache_factor is not None:
       
   412             self._uncompressed_chunk_cache = util.lrucachedict(
       
   413                 self.data_config.uncompressed_cache_count,
       
   414                 maxcost=65536,  # some arbitrary initial value
       
   415             )
       
   416 
   399         self._delay_buffer = None
   417         self._delay_buffer = None
   400 
   418 
   401     @property
   419     @property
   402     def index_file(self):
   420     def index_file(self):
   403         return self.__index_file
   421         return self.__index_file
   412         return len(self.index)
   430         return len(self.index)
   413 
   431 
   414     def clear_cache(self):
   432     def clear_cache(self):
   415         assert not self.is_delaying
   433         assert not self.is_delaying
   416         self._revisioncache = None
   434         self._revisioncache = None
       
   435         if self._uncompressed_chunk_cache is not None:
       
   436             self._uncompressed_chunk_cache.clear()
   417         self._segmentfile.clear_cache()
   437         self._segmentfile.clear_cache()
   418         self._segmentfile_sidedata.clear_cache()
   438         self._segmentfile_sidedata.clear_cache()
   419 
   439 
   420     @property
   440     @property
   421     def canonical_index_file(self):
   441     def canonical_index_file(self):
   863         to be used for reading. If used, the seek position of the file will not
   883         to be used for reading. If used, the seek position of the file will not
   864         be preserved.
   884         be preserved.
   865 
   885 
   866         Returns a str holding uncompressed data for the requested revision.
   886         Returns a str holding uncompressed data for the requested revision.
   867         """
   887         """
       
   888         if self._uncompressed_chunk_cache is not None:
       
   889             uncomp = self._uncompressed_chunk_cache.get(rev)
       
   890             if uncomp is not None:
       
   891                 return uncomp
       
   892 
   868         compression_mode = self.index[rev][10]
   893         compression_mode = self.index[rev][10]
   869         data = self.get_segment_for_revs(rev, rev)[1]
   894         data = self.get_segment_for_revs(rev, rev)[1]
   870         if compression_mode == COMP_MODE_PLAIN:
   895         if compression_mode == COMP_MODE_PLAIN:
   871             return data
   896             uncomp = data
   872         elif compression_mode == COMP_MODE_DEFAULT:
   897         elif compression_mode == COMP_MODE_DEFAULT:
   873             return self._decompressor(data)
   898             uncomp = self._decompressor(data)
   874         elif compression_mode == COMP_MODE_INLINE:
   899         elif compression_mode == COMP_MODE_INLINE:
   875             return self.decompress(data)
   900             uncomp = self.decompress(data)
   876         else:
   901         else:
   877             msg = b'unknown compression mode %d'
   902             msg = b'unknown compression mode %d'
   878             msg %= compression_mode
   903             msg %= compression_mode
   879             raise error.RevlogError(msg)
   904             raise error.RevlogError(msg)
       
   905         if self._uncompressed_chunk_cache is not None:
       
   906             self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
       
   907         return uncomp
   880 
   908 
   881     def _chunks(self, revs, targetsize=None):
   909     def _chunks(self, revs, targetsize=None):
   882         """Obtain decompressed chunks for the specified revisions.
   910         """Obtain decompressed chunks for the specified revisions.
   883 
   911 
   884         Accepts an iterable of numeric revisions that are assumed to be in
   912         Accepts an iterable of numeric revisions that are assumed to be in
   897         length = self.length
   925         length = self.length
   898         inline = self.inline
   926         inline = self.inline
   899         iosize = self.index.entry_size
   927         iosize = self.index.entry_size
   900         buffer = util.buffer
   928         buffer = util.buffer
   901 
   929 
   902         l = []
   930         fetched_revs = []
   903         ladd = l.append
   931         fadd = fetched_revs.append
       
   932 
   904         chunks = []
   933         chunks = []
   905         ladd = chunks.append
   934         ladd = chunks.append
   906 
   935 
   907         if not self.data_config.with_sparse_read:
   936         if self._uncompressed_chunk_cache is None:
   908             slicedchunks = (revs,)
   937             fetched_revs = revs
       
   938         else:
       
   939             for rev in revs:
       
   940                 cached_value = self._uncompressed_chunk_cache.get(rev)
       
   941                 if cached_value is None:
       
   942                     fadd(rev)
       
   943                 else:
       
   944                     ladd((rev, cached_value))
       
   945 
       
   946         if not fetched_revs:
       
   947             slicedchunks = ()
       
   948         elif not self.data_config.with_sparse_read:
       
   949             slicedchunks = (fetched_revs,)
   909         else:
   950         else:
   910             slicedchunks = deltautil.slicechunk(
   951             slicedchunks = deltautil.slicechunk(
   911                 self,
   952                 self,
   912                 revs,
   953                 fetched_revs,
   913                 targetsize=targetsize,
   954                 targetsize=targetsize,
   914             )
   955             )
   915 
   956 
   916         for revschunk in slicedchunks:
   957         for revschunk in slicedchunks:
   917             firstrev = revschunk[0]
   958             firstrev = revschunk[0]
   947                 else:
   988                 else:
   948                     msg = b'unknown compression mode %d'
   989                     msg = b'unknown compression mode %d'
   949                     msg %= comp_mode
   990                     msg %= comp_mode
   950                     raise error.RevlogError(msg)
   991                     raise error.RevlogError(msg)
   951                 ladd((rev, c))
   992                 ladd((rev, c))
   952 
   993                 if self._uncompressed_chunk_cache is not None:
       
   994                     self._uncompressed_chunk_cache.insert(rev, c, len(c))
       
   995 
       
   996         chunks.sort()
   953         return [x[1] for x in chunks]
   997         return [x[1] for x in chunks]
   954 
   998 
   955     def raw_text(self, node, rev):
   999     def raw_text(self, node, rev):
   956         """return the possibly unvalidated rawtext for a revision
  1000         """return the possibly unvalidated rawtext for a revision
   957 
  1001 
   978 
  1022 
   979         targetsize = None
  1023         targetsize = None
   980         rawsize = self.index[rev][2]
  1024         rawsize = self.index[rev][2]
   981         if 0 <= rawsize:
  1025         if 0 <= rawsize:
   982             targetsize = 4 * rawsize
  1026             targetsize = 4 * rawsize
       
  1027 
       
  1028         if self._uncompressed_chunk_cache is not None:
       
  1029             # dynamically update the uncompressed_chunk_cache size to the
       
  1030             # largest revision we saw in this revlog.
       
  1031             factor = self.data_config.uncompressed_cache_factor
       
  1032             candidate_size = rawsize * factor
       
  1033             if candidate_size > self._uncompressed_chunk_cache.maxcost:
       
  1034                 self._uncompressed_chunk_cache.maxcost = candidate_size
   983 
  1035 
   984         bins = self._chunks(chain, targetsize=targetsize)
  1036         bins = self._chunks(chain, targetsize=targetsize)
   985         if basetext is None:
  1037         if basetext is None:
   986             basetext = bytes(bins[0])
  1038             basetext = bytes(bins[0])
   987             bins = bins[1:]
  1039             bins = bins[1:]