mercurial/revlogutils/randomaccessfile.py
changeset 47425 e0a314bcbc9d
child 47463 5fa083a5ff04
equal deleted inserted replaced
47424:f77404040776 47425:e0a314bcbc9d
       
     1 # Copyright Mercurial Contributors
       
     2 #
       
     3 # This software may be used and distributed according to the terms of the
       
     4 # GNU General Public License version 2 or any later version.
       
     5 
       
     6 import contextlib
       
     7 
       
     8 from ..i18n import _
       
     9 from .. import (
       
    10     error,
       
    11     util,
       
    12 )
       
    13 
       
    14 
       
    15 _MAX_CACHED_CHUNK_SIZE = 1048576  # 1 MiB
       
    16 
       
    17 PARTIAL_READ_MSG = _(
       
    18     b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
       
    19 )
       
    20 
       
    21 
       
    22 def _is_power_of_two(n):
       
    23     return (n & (n - 1) == 0) and n != 0
       
    24 
       
    25 
       
    26 class randomaccessfile(object):
       
    27     """Accessing arbitrary chuncks of data within a file, with some caching"""
       
    28 
       
    29     def __init__(
       
    30         self,
       
    31         opener,
       
    32         filename,
       
    33         default_cached_chunk_size,
       
    34         initial_cache=None,
       
    35     ):
       
    36         # Required by bitwise manipulation below
       
    37         assert _is_power_of_two(default_cached_chunk_size)
       
    38 
       
    39         self.opener = opener
       
    40         self.filename = filename
       
    41         self.default_cached_chunk_size = default_cached_chunk_size
       
    42         self.writing_handle = None  # This is set from revlog.py
       
    43         self._cached_chunk = b''
       
    44         self._cached_chunk_position = 0  # Offset from the start of the file
       
    45         if initial_cache:
       
    46             self._cached_chunk_position, self._cached_chunk = initial_cache
       
    47 
       
    48     def clear_cache(self):
       
    49         self._cached_chunk = b''
       
    50         self._cached_chunk_position = 0
       
    51 
       
    52     def _open(self, mode=b'r'):
       
    53         """Return a file object"""
       
    54         return self.opener(self.filename, mode=mode)
       
    55 
       
    56     @contextlib.contextmanager
       
    57     def _open_read(self, existing_file_obj=None):
       
    58         """File object suitable for reading data"""
       
    59         # Use explicit file handle, if given.
       
    60         if existing_file_obj is not None:
       
    61             yield existing_file_obj
       
    62 
       
    63         # Use a file handle being actively used for writes, if available.
       
    64         # There is some danger to doing this because reads will seek the
       
    65         # file. However, revlog._writeentry performs a SEEK_END before all
       
    66         # writes, so we should be safe.
       
    67         elif self.writing_handle:
       
    68             yield self.writing_handle
       
    69 
       
    70         # Otherwise open a new file handle.
       
    71         else:
       
    72             with self._open() as fp:
       
    73                 yield fp
       
    74 
       
    75     def read_chunk(self, offset, length, existing_file_obj=None):
       
    76         """Read a chunk of bytes from the file.
       
    77 
       
    78         Accepts an absolute offset, length to read, and an optional existing
       
    79         file handle to read from.
       
    80 
       
    81         If an existing file handle is passed, it will be seeked and the
       
    82         original seek position will NOT be restored.
       
    83 
       
    84         Returns a str or buffer of raw byte data.
       
    85 
       
    86         Raises if the requested number of bytes could not be read.
       
    87         """
       
    88         end = offset + length
       
    89         cache_start = self._cached_chunk_position
       
    90         cache_end = cache_start + len(self._cached_chunk)
       
    91         # Is the requested chunk within the cache?
       
    92         if cache_start <= offset and end <= cache_end:
       
    93             if cache_start == offset and end == cache_end:
       
    94                 return self._cached_chunk  # avoid a copy
       
    95             relative_start = offset - cache_start
       
    96             return util.buffer(self._cached_chunk, relative_start, length)
       
    97 
       
    98         return self._read_and_update_cache(offset, length, existing_file_obj)
       
    99 
       
   100     def _read_and_update_cache(self, offset, length, existing_file_obj=None):
       
   101         # Cache data both forward and backward around the requested
       
   102         # data, in a fixed size window. This helps speed up operations
       
   103         # involving reading the revlog backwards.
       
   104         real_offset = offset & ~(self.default_cached_chunk_size - 1)
       
   105         real_length = (
       
   106             (offset + length + self.default_cached_chunk_size)
       
   107             & ~(self.default_cached_chunk_size - 1)
       
   108         ) - real_offset
       
   109         with self._open_read(existing_file_obj) as file_obj:
       
   110             file_obj.seek(real_offset)
       
   111             data = file_obj.read(real_length)
       
   112 
       
   113         self._add_cached_chunk(real_offset, data)
       
   114 
       
   115         relative_offset = offset - real_offset
       
   116         got = len(data) - relative_offset
       
   117         if got < length:
       
   118             message = PARTIAL_READ_MSG % (self.filename, length, offset, got)
       
   119             raise error.RevlogError(message)
       
   120 
       
   121         if offset != real_offset or real_length != length:
       
   122             return util.buffer(data, relative_offset, length)
       
   123         return data
       
   124 
       
   125     def _add_cached_chunk(self, offset, data):
       
   126         """Add to or replace the cached data chunk.
       
   127 
       
   128         Accepts an absolute offset and the data that is at that location.
       
   129         """
       
   130         if (
       
   131             self._cached_chunk_position + len(self._cached_chunk) == offset
       
   132             and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE
       
   133         ):
       
   134             # add to existing cache
       
   135             self._cached_chunk += data
       
   136         else:
       
   137             self._cached_chunk = data
       
   138             self._cached_chunk_position = offset