293 # how much data is large |
293 # how much data is large |
294 mmap_index_threshold = attr.ib(default=None) |
294 mmap_index_threshold = attr.ib(default=None) |
295 # How much data to read and cache into the raw revlog data cache. |
295 # How much data to read and cache into the raw revlog data cache. |
296 chunk_cache_size = attr.ib(default=65536) |
296 chunk_cache_size = attr.ib(default=65536) |
297 |
297 |
|
298 # The size of the uncompressed cache compared to the largest revision seen. |
|
299 uncompressed_cache_factor = attr.ib(default=None) |
|
300 |
|
301 # The number of chunk cached |
|
302 uncompressed_cache_count = attr.ib(default=None) |
|
303 |
298 # Allow sparse reading of the revlog data |
304 # Allow sparse reading of the revlog data |
299 with_sparse_read = attr.ib(default=False) |
305 with_sparse_read = attr.ib(default=False) |
300 # minimal density of a sparse read chunk |
306 # minimal density of a sparse read chunk |
301 sr_density_threshold = attr.ib(default=0.50) |
307 sr_density_threshold = attr.ib(default=0.50) |
302 # minimal size of data we skip when performing sparse read |
308 # minimal size of data we skip when performing sparse read |
394 # revlog header -> revlog compressor |
400 # revlog header -> revlog compressor |
395 self._decompressors = {} |
401 self._decompressors = {} |
396 # 3-tuple of (node, rev, text) for a raw revision. |
402 # 3-tuple of (node, rev, text) for a raw revision. |
397 self._revisioncache = None |
403 self._revisioncache = None |
398 |
404 |
|
405 # cache some uncompressed chunks |
|
406 # rev → uncompressed_chunk |
|
407 # |
|
408 # the max cost is dynamically updated to be proportionnal to the |
|
409 # size of revision we actually encounter. |
|
410 self._uncompressed_chunk_cache = None |
|
411 if self.data_config.uncompressed_cache_factor is not None: |
|
412 self._uncompressed_chunk_cache = util.lrucachedict( |
|
413 self.data_config.uncompressed_cache_count, |
|
414 maxcost=65536, # some arbitrary initial value |
|
415 ) |
|
416 |
399 self._delay_buffer = None |
417 self._delay_buffer = None |
400 |
418 |
401 @property |
419 @property |
402 def index_file(self): |
420 def index_file(self): |
403 return self.__index_file |
421 return self.__index_file |
412 return len(self.index) |
430 return len(self.index) |
413 |
431 |
414 def clear_cache(self): |
432 def clear_cache(self): |
415 assert not self.is_delaying |
433 assert not self.is_delaying |
416 self._revisioncache = None |
434 self._revisioncache = None |
|
435 if self._uncompressed_chunk_cache is not None: |
|
436 self._uncompressed_chunk_cache.clear() |
417 self._segmentfile.clear_cache() |
437 self._segmentfile.clear_cache() |
418 self._segmentfile_sidedata.clear_cache() |
438 self._segmentfile_sidedata.clear_cache() |
419 |
439 |
420 @property |
440 @property |
421 def canonical_index_file(self): |
441 def canonical_index_file(self): |
863 to be used for reading. If used, the seek position of the file will not |
883 to be used for reading. If used, the seek position of the file will not |
864 be preserved. |
884 be preserved. |
865 |
885 |
866 Returns a str holding uncompressed data for the requested revision. |
886 Returns a str holding uncompressed data for the requested revision. |
867 """ |
887 """ |
|
888 if self._uncompressed_chunk_cache is not None: |
|
889 uncomp = self._uncompressed_chunk_cache.get(rev) |
|
890 if uncomp is not None: |
|
891 return uncomp |
|
892 |
868 compression_mode = self.index[rev][10] |
893 compression_mode = self.index[rev][10] |
869 data = self.get_segment_for_revs(rev, rev)[1] |
894 data = self.get_segment_for_revs(rev, rev)[1] |
870 if compression_mode == COMP_MODE_PLAIN: |
895 if compression_mode == COMP_MODE_PLAIN: |
871 return data |
896 uncomp = data |
872 elif compression_mode == COMP_MODE_DEFAULT: |
897 elif compression_mode == COMP_MODE_DEFAULT: |
873 return self._decompressor(data) |
898 uncomp = self._decompressor(data) |
874 elif compression_mode == COMP_MODE_INLINE: |
899 elif compression_mode == COMP_MODE_INLINE: |
875 return self.decompress(data) |
900 uncomp = self.decompress(data) |
876 else: |
901 else: |
877 msg = b'unknown compression mode %d' |
902 msg = b'unknown compression mode %d' |
878 msg %= compression_mode |
903 msg %= compression_mode |
879 raise error.RevlogError(msg) |
904 raise error.RevlogError(msg) |
|
905 if self._uncompressed_chunk_cache is not None: |
|
906 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp)) |
|
907 return uncomp |
880 |
908 |
881 def _chunks(self, revs, targetsize=None): |
909 def _chunks(self, revs, targetsize=None): |
882 """Obtain decompressed chunks for the specified revisions. |
910 """Obtain decompressed chunks for the specified revisions. |
883 |
911 |
884 Accepts an iterable of numeric revisions that are assumed to be in |
912 Accepts an iterable of numeric revisions that are assumed to be in |
897 length = self.length |
925 length = self.length |
898 inline = self.inline |
926 inline = self.inline |
899 iosize = self.index.entry_size |
927 iosize = self.index.entry_size |
900 buffer = util.buffer |
928 buffer = util.buffer |
901 |
929 |
902 l = [] |
930 fetched_revs = [] |
903 ladd = l.append |
931 fadd = fetched_revs.append |
|
932 |
904 chunks = [] |
933 chunks = [] |
905 ladd = chunks.append |
934 ladd = chunks.append |
906 |
935 |
907 if not self.data_config.with_sparse_read: |
936 if self._uncompressed_chunk_cache is None: |
908 slicedchunks = (revs,) |
937 fetched_revs = revs |
|
938 else: |
|
939 for rev in revs: |
|
940 cached_value = self._uncompressed_chunk_cache.get(rev) |
|
941 if cached_value is None: |
|
942 fadd(rev) |
|
943 else: |
|
944 ladd((rev, cached_value)) |
|
945 |
|
946 if not fetched_revs: |
|
947 slicedchunks = () |
|
948 elif not self.data_config.with_sparse_read: |
|
949 slicedchunks = (fetched_revs,) |
909 else: |
950 else: |
910 slicedchunks = deltautil.slicechunk( |
951 slicedchunks = deltautil.slicechunk( |
911 self, |
952 self, |
912 revs, |
953 fetched_revs, |
913 targetsize=targetsize, |
954 targetsize=targetsize, |
914 ) |
955 ) |
915 |
956 |
916 for revschunk in slicedchunks: |
957 for revschunk in slicedchunks: |
917 firstrev = revschunk[0] |
958 firstrev = revschunk[0] |
947 else: |
988 else: |
948 msg = b'unknown compression mode %d' |
989 msg = b'unknown compression mode %d' |
949 msg %= comp_mode |
990 msg %= comp_mode |
950 raise error.RevlogError(msg) |
991 raise error.RevlogError(msg) |
951 ladd((rev, c)) |
992 ladd((rev, c)) |
952 |
993 if self._uncompressed_chunk_cache is not None: |
|
994 self._uncompressed_chunk_cache.insert(rev, c, len(c)) |
|
995 |
|
996 chunks.sort() |
953 return [x[1] for x in chunks] |
997 return [x[1] for x in chunks] |
954 |
998 |
955 def raw_text(self, node, rev): |
999 def raw_text(self, node, rev): |
956 """return the possibly unvalidated rawtext for a revision |
1000 """return the possibly unvalidated rawtext for a revision |
957 |
1001 |
978 |
1022 |
979 targetsize = None |
1023 targetsize = None |
980 rawsize = self.index[rev][2] |
1024 rawsize = self.index[rev][2] |
981 if 0 <= rawsize: |
1025 if 0 <= rawsize: |
982 targetsize = 4 * rawsize |
1026 targetsize = 4 * rawsize |
|
1027 |
|
1028 if self._uncompressed_chunk_cache is not None: |
|
1029 # dynamically update the uncompressed_chunk_cache size to the |
|
1030 # largest revision we saw in this revlog. |
|
1031 factor = self.data_config.uncompressed_cache_factor |
|
1032 candidate_size = rawsize * factor |
|
1033 if candidate_size > self._uncompressed_chunk_cache.maxcost: |
|
1034 self._uncompressed_chunk_cache.maxcost = candidate_size |
983 |
1035 |
984 bins = self._chunks(chain, targetsize=targetsize) |
1036 bins = self._chunks(chain, targetsize=targetsize) |
985 if basetext is None: |
1037 if basetext is None: |
986 basetext = bytes(bins[0]) |
1038 basetext = bytes(bins[0]) |
987 bins = bins[1:] |
1039 bins = bins[1:] |