|
1 # Copyright Mercurial Contributors |
|
2 # |
|
3 # This software may be used and distributed according to the terms of the |
|
4 # GNU General Public License version 2 or any later version. |
|
5 |
|
6 import contextlib |
|
7 |
|
8 from ..i18n import _ |
|
9 from .. import ( |
|
10 error, |
|
11 util, |
|
12 ) |
|
13 |
|
14 |
|
15 _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB |
|
16 |
|
17 PARTIAL_READ_MSG = _( |
|
18 b'partial read of revlog %s; expected %d bytes from offset %d, got %d' |
|
19 ) |
|
20 |
|
21 |
|
22 def _is_power_of_two(n): |
|
23 return (n & (n - 1) == 0) and n != 0 |
|
24 |
|
25 |
|
26 class randomaccessfile(object): |
|
27 """Accessing arbitrary chuncks of data within a file, with some caching""" |
|
28 |
|
29 def __init__( |
|
30 self, |
|
31 opener, |
|
32 filename, |
|
33 default_cached_chunk_size, |
|
34 initial_cache=None, |
|
35 ): |
|
36 # Required by bitwise manipulation below |
|
37 assert _is_power_of_two(default_cached_chunk_size) |
|
38 |
|
39 self.opener = opener |
|
40 self.filename = filename |
|
41 self.default_cached_chunk_size = default_cached_chunk_size |
|
42 self.writing_handle = None # This is set from revlog.py |
|
43 self._cached_chunk = b'' |
|
44 self._cached_chunk_position = 0 # Offset from the start of the file |
|
45 if initial_cache: |
|
46 self._cached_chunk_position, self._cached_chunk = initial_cache |
|
47 |
|
48 def clear_cache(self): |
|
49 self._cached_chunk = b'' |
|
50 self._cached_chunk_position = 0 |
|
51 |
|
52 def _open(self, mode=b'r'): |
|
53 """Return a file object""" |
|
54 return self.opener(self.filename, mode=mode) |
|
55 |
|
56 @contextlib.contextmanager |
|
57 def _open_read(self, existing_file_obj=None): |
|
58 """File object suitable for reading data""" |
|
59 # Use explicit file handle, if given. |
|
60 if existing_file_obj is not None: |
|
61 yield existing_file_obj |
|
62 |
|
63 # Use a file handle being actively used for writes, if available. |
|
64 # There is some danger to doing this because reads will seek the |
|
65 # file. However, revlog._writeentry performs a SEEK_END before all |
|
66 # writes, so we should be safe. |
|
67 elif self.writing_handle: |
|
68 yield self.writing_handle |
|
69 |
|
70 # Otherwise open a new file handle. |
|
71 else: |
|
72 with self._open() as fp: |
|
73 yield fp |
|
74 |
|
75 def read_chunk(self, offset, length, existing_file_obj=None): |
|
76 """Read a chunk of bytes from the file. |
|
77 |
|
78 Accepts an absolute offset, length to read, and an optional existing |
|
79 file handle to read from. |
|
80 |
|
81 If an existing file handle is passed, it will be seeked and the |
|
82 original seek position will NOT be restored. |
|
83 |
|
84 Returns a str or buffer of raw byte data. |
|
85 |
|
86 Raises if the requested number of bytes could not be read. |
|
87 """ |
|
88 end = offset + length |
|
89 cache_start = self._cached_chunk_position |
|
90 cache_end = cache_start + len(self._cached_chunk) |
|
91 # Is the requested chunk within the cache? |
|
92 if cache_start <= offset and end <= cache_end: |
|
93 if cache_start == offset and end == cache_end: |
|
94 return self._cached_chunk # avoid a copy |
|
95 relative_start = offset - cache_start |
|
96 return util.buffer(self._cached_chunk, relative_start, length) |
|
97 |
|
98 return self._read_and_update_cache(offset, length, existing_file_obj) |
|
99 |
|
100 def _read_and_update_cache(self, offset, length, existing_file_obj=None): |
|
101 # Cache data both forward and backward around the requested |
|
102 # data, in a fixed size window. This helps speed up operations |
|
103 # involving reading the revlog backwards. |
|
104 real_offset = offset & ~(self.default_cached_chunk_size - 1) |
|
105 real_length = ( |
|
106 (offset + length + self.default_cached_chunk_size) |
|
107 & ~(self.default_cached_chunk_size - 1) |
|
108 ) - real_offset |
|
109 with self._open_read(existing_file_obj) as file_obj: |
|
110 file_obj.seek(real_offset) |
|
111 data = file_obj.read(real_length) |
|
112 |
|
113 self._add_cached_chunk(real_offset, data) |
|
114 |
|
115 relative_offset = offset - real_offset |
|
116 got = len(data) - relative_offset |
|
117 if got < length: |
|
118 message = PARTIAL_READ_MSG % (self.filename, length, offset, got) |
|
119 raise error.RevlogError(message) |
|
120 |
|
121 if offset != real_offset or real_length != length: |
|
122 return util.buffer(data, relative_offset, length) |
|
123 return data |
|
124 |
|
125 def _add_cached_chunk(self, offset, data): |
|
126 """Add to or replace the cached data chunk. |
|
127 |
|
128 Accepts an absolute offset and the data that is at that location. |
|
129 """ |
|
130 if ( |
|
131 self._cached_chunk_position + len(self._cached_chunk) == offset |
|
132 and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE |
|
133 ): |
|
134 # add to existing cache |
|
135 self._cached_chunk += data |
|
136 else: |
|
137 self._cached_chunk = data |
|
138 self._cached_chunk_position = offset |