24 |
25 |
25 # import stuff from node for others to import from revlog |
26 # import stuff from node for others to import from revlog |
26 from .node import ( |
27 from .node import ( |
27 bin, |
28 bin, |
28 hex, |
29 hex, |
29 nullhex, |
|
30 nullid, |
|
31 nullrev, |
30 nullrev, |
32 sha1nodeconstants, |
31 sha1nodeconstants, |
33 short, |
32 short, |
34 wdirfilenodeids, |
|
35 wdirhex, |
|
36 wdirid, |
|
37 wdirrev, |
33 wdirrev, |
38 ) |
34 ) |
39 from .i18n import _ |
35 from .i18n import _ |
40 from .pycompat import getattr |
36 from .pycompat import getattr |
41 from .revlogutils.constants import ( |
37 from .revlogutils.constants import ( |
|
38 ALL_KINDS, |
|
39 CHANGELOGV2, |
|
40 COMP_MODE_DEFAULT, |
|
41 COMP_MODE_INLINE, |
|
42 COMP_MODE_PLAIN, |
|
43 FEATURES_BY_VERSION, |
42 FLAG_GENERALDELTA, |
44 FLAG_GENERALDELTA, |
43 FLAG_INLINE_DATA, |
45 FLAG_INLINE_DATA, |
44 INDEX_ENTRY_V0, |
|
45 INDEX_ENTRY_V1, |
|
46 INDEX_ENTRY_V2, |
|
47 INDEX_HEADER, |
46 INDEX_HEADER, |
|
47 KIND_CHANGELOG, |
48 REVLOGV0, |
48 REVLOGV0, |
49 REVLOGV1, |
49 REVLOGV1, |
50 REVLOGV1_FLAGS, |
50 REVLOGV1_FLAGS, |
51 REVLOGV2, |
51 REVLOGV2, |
52 REVLOGV2_FLAGS, |
52 REVLOGV2_FLAGS, |
53 REVLOG_DEFAULT_FLAGS, |
53 REVLOG_DEFAULT_FLAGS, |
54 REVLOG_DEFAULT_FORMAT, |
54 REVLOG_DEFAULT_FORMAT, |
55 REVLOG_DEFAULT_VERSION, |
55 REVLOG_DEFAULT_VERSION, |
|
56 SUPPORTED_FLAGS, |
56 ) |
57 ) |
57 from .revlogutils.flagutil import ( |
58 from .revlogutils.flagutil import ( |
58 REVIDX_DEFAULT_FLAGS, |
59 REVIDX_DEFAULT_FLAGS, |
59 REVIDX_ELLIPSIS, |
60 REVIDX_ELLIPSIS, |
60 REVIDX_EXTSTORED, |
61 REVIDX_EXTSTORED, |
61 REVIDX_FLAGS_ORDER, |
62 REVIDX_FLAGS_ORDER, |
62 REVIDX_HASCOPIESINFO, |
63 REVIDX_HASCOPIESINFO, |
63 REVIDX_ISCENSORED, |
64 REVIDX_ISCENSORED, |
64 REVIDX_RAWTEXT_CHANGING_FLAGS, |
65 REVIDX_RAWTEXT_CHANGING_FLAGS, |
65 REVIDX_SIDEDATA, |
|
66 ) |
66 ) |
67 from .thirdparty import attr |
67 from .thirdparty import attr |
68 from . import ( |
68 from . import ( |
69 ancestor, |
69 ancestor, |
70 dagop, |
70 dagop, |
71 error, |
71 error, |
72 mdiff, |
72 mdiff, |
73 policy, |
73 policy, |
74 pycompat, |
74 pycompat, |
|
75 revlogutils, |
75 templatefilters, |
76 templatefilters, |
76 util, |
77 util, |
77 ) |
78 ) |
78 from .interfaces import ( |
79 from .interfaces import ( |
79 repository, |
80 repository, |
80 util as interfaceutil, |
81 util as interfaceutil, |
81 ) |
82 ) |
82 from .revlogutils import ( |
83 from .revlogutils import ( |
83 deltas as deltautil, |
84 deltas as deltautil, |
|
85 docket as docketutil, |
84 flagutil, |
86 flagutil, |
85 nodemap as nodemaputil, |
87 nodemap as nodemaputil, |
|
88 randomaccessfile, |
|
89 revlogv0, |
|
90 rewrite, |
86 sidedata as sidedatautil, |
91 sidedata as sidedatautil, |
87 ) |
92 ) |
88 from .utils import ( |
93 from .utils import ( |
89 storageutil, |
94 storageutil, |
90 stringutil, |
95 stringutil, |
91 ) |
96 ) |
92 |
97 |
93 # blanked usage of all the name to prevent pyflakes constraints |
98 # blanked usage of all the name to prevent pyflakes constraints |
94 # We need these name available in the module for extensions. |
99 # We need these name available in the module for extensions. |
|
100 |
95 REVLOGV0 |
101 REVLOGV0 |
96 REVLOGV1 |
102 REVLOGV1 |
97 REVLOGV2 |
103 REVLOGV2 |
98 FLAG_INLINE_DATA |
104 FLAG_INLINE_DATA |
99 FLAG_GENERALDELTA |
105 FLAG_GENERALDELTA |
219 warning = attr.ib(default=None) |
189 warning = attr.ib(default=None) |
220 error = attr.ib(default=None) |
190 error = attr.ib(default=None) |
221 node = attr.ib(default=None) |
191 node = attr.ib(default=None) |
222 |
192 |
223 |
193 |
224 class revlogoldindex(list): |
194 def parse_index_v1(data, inline): |
225 entry_size = INDEX_ENTRY_V0.size |
195 # call the C implementation to parse the index data |
226 |
196 index, cache = parsers.parse_index2(data, inline) |
227 @property |
197 return index, cache |
228 def nodemap(self): |
198 |
229 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]" |
199 |
230 util.nouideprecwarn(msg, b'5.3', stacklevel=2) |
200 def parse_index_v2(data, inline): |
231 return self._nodemap |
201 # call the C implementation to parse the index data |
232 |
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True) |
233 @util.propertycache |
203 return index, cache |
234 def _nodemap(self): |
204 |
235 nodemap = nodemaputil.NodeMap({nullid: nullrev}) |
205 |
236 for r in range(0, len(self)): |
206 def parse_index_cl_v2(data, inline): |
237 n = self[r][7] |
207 # call the C implementation to parse the index data |
238 nodemap[n] = r |
208 assert not inline |
239 return nodemap |
209 from .pure.parsers import parse_index_cl_v2 |
240 |
210 |
241 def has_node(self, node): |
211 index, cache = parse_index_cl_v2(data) |
242 """return True if the node exist in the index""" |
212 return index, cache |
243 return node in self._nodemap |
213 |
244 |
214 |
245 def rev(self, node): |
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'): |
246 """return a revision for a node |
216 |
247 |
217 def parse_index_v1_nodemap(data, inline): |
248 If the node is unknown, raise a RevlogError""" |
218 index, cache = parsers.parse_index_devel_nodemap(data, inline) |
249 return self._nodemap[node] |
219 return index, cache |
250 |
220 |
251 def get_rev(self, node): |
221 |
252 """return a revision for a node |
222 else: |
253 |
223 parse_index_v1_nodemap = None |
254 If the node is unknown, return None""" |
224 |
255 return self._nodemap.get(node) |
225 |
256 |
226 def parse_index_v1_mixed(data, inline): |
257 def append(self, tup): |
227 index, cache = parse_index_v1(data, inline) |
258 self._nodemap[tup[7]] = len(self) |
228 return rustrevlog.MixedIndex(index), cache |
259 super(revlogoldindex, self).append(tup) |
|
260 |
|
261 def __delitem__(self, i): |
|
262 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None: |
|
263 raise ValueError(b"deleting slices only supports a:-1 with step 1") |
|
264 for r in pycompat.xrange(i.start, len(self)): |
|
265 del self._nodemap[self[r][7]] |
|
266 super(revlogoldindex, self).__delitem__(i) |
|
267 |
|
268 def clearcaches(self): |
|
269 self.__dict__.pop('_nodemap', None) |
|
270 |
|
271 def __getitem__(self, i): |
|
272 if i == -1: |
|
273 return (0, 0, 0, -1, -1, -1, -1, nullid) |
|
274 return list.__getitem__(self, i) |
|
275 |
|
276 |
|
277 class revlogoldio(object): |
|
278 def parseindex(self, data, inline): |
|
279 s = INDEX_ENTRY_V0.size |
|
280 index = [] |
|
281 nodemap = nodemaputil.NodeMap({nullid: nullrev}) |
|
282 n = off = 0 |
|
283 l = len(data) |
|
284 while off + s <= l: |
|
285 cur = data[off : off + s] |
|
286 off += s |
|
287 e = INDEX_ENTRY_V0.unpack(cur) |
|
288 # transform to revlogv1 format |
|
289 e2 = ( |
|
290 offset_type(e[0], 0), |
|
291 e[1], |
|
292 -1, |
|
293 e[2], |
|
294 e[3], |
|
295 nodemap.get(e[4], nullrev), |
|
296 nodemap.get(e[5], nullrev), |
|
297 e[6], |
|
298 ) |
|
299 index.append(e2) |
|
300 nodemap[e[6]] = n |
|
301 n += 1 |
|
302 |
|
303 index = revlogoldindex(index) |
|
304 return index, None |
|
305 |
|
306 def packentry(self, entry, node, version, rev): |
|
307 """return the binary representation of an entry |
|
308 |
|
309 entry: a tuple containing all the values (see index.__getitem__) |
|
310 node: a callback to convert a revision to nodeid |
|
311 version: the changelog version |
|
312 rev: the revision number |
|
313 """ |
|
314 if gettype(entry[0]): |
|
315 raise error.RevlogError( |
|
316 _(b'index entry flags need revlog version 1') |
|
317 ) |
|
318 e2 = ( |
|
319 getoffset(entry[0]), |
|
320 entry[1], |
|
321 entry[3], |
|
322 entry[4], |
|
323 node(entry[5]), |
|
324 node(entry[6]), |
|
325 entry[7], |
|
326 ) |
|
327 return INDEX_ENTRY_V0.pack(*e2) |
|
328 |
229 |
329 |
230 |
330 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte |
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte |
331 # signed integer) |
232 # signed integer) |
332 _maxentrysize = 0x7FFFFFFF |
233 _maxentrysize = 0x7FFFFFFF |
333 |
234 |
334 |
235 FILE_TOO_SHORT_MSG = _( |
335 class revlogio(object): |
236 b'cannot read from revlog %s;' |
336 def parseindex(self, data, inline): |
237 b' expected %d bytes from offset %d, data size is %d' |
337 # call the C implementation to parse the index data |
238 ) |
338 index, cache = parsers.parse_index2(data, inline) |
|
339 return index, cache |
|
340 |
|
341 def packentry(self, entry, node, version, rev): |
|
342 p = INDEX_ENTRY_V1.pack(*entry) |
|
343 if rev == 0: |
|
344 p = INDEX_HEADER.pack(version) + p[4:] |
|
345 return p |
|
346 |
|
347 |
|
348 class revlogv2io(object): |
|
349 def parseindex(self, data, inline): |
|
350 index, cache = parsers.parse_index2(data, inline, revlogv2=True) |
|
351 return index, cache |
|
352 |
|
353 def packentry(self, entry, node, version, rev): |
|
354 p = INDEX_ENTRY_V2.pack(*entry) |
|
355 if rev == 0: |
|
356 p = INDEX_HEADER.pack(version) + p[4:] |
|
357 return p |
|
358 |
|
359 |
|
360 NodemapRevlogIO = None |
|
361 |
|
362 if util.safehasattr(parsers, 'parse_index_devel_nodemap'): |
|
363 |
|
364 class NodemapRevlogIO(revlogio): |
|
365 """A debug oriented IO class that return a PersistentNodeMapIndexObject |
|
366 |
|
367 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature. |
|
368 """ |
|
369 |
|
370 def parseindex(self, data, inline): |
|
371 index, cache = parsers.parse_index_devel_nodemap(data, inline) |
|
372 return index, cache |
|
373 |
|
374 |
|
375 class rustrevlogio(revlogio): |
|
376 def parseindex(self, data, inline): |
|
377 index, cache = super(rustrevlogio, self).parseindex(data, inline) |
|
378 return rustrevlog.MixedIndex(index), cache |
|
379 |
239 |
380 |
240 |
381 class revlog(object): |
241 class revlog(object): |
382 """ |
242 """ |
383 the underlying revision storage object |
243 the underlying revision storage object |
417 |
277 |
418 `concurrencychecker` is an optional function that receives 3 arguments: a |
278 `concurrencychecker` is an optional function that receives 3 arguments: a |
419 file handle, a filename, and an expected position. It should check whether |
279 file handle, a filename, and an expected position. It should check whether |
420 the current position in the file handle is valid, and log/warn/fail (by |
280 the current position in the file handle is valid, and log/warn/fail (by |
421 raising). |
281 raising). |
|
282 |
|
283 See mercurial/revlogutils/contants.py for details about the content of an |
|
284 index entry. |
422 """ |
285 """ |
423 |
286 |
424 _flagserrorclass = error.RevlogError |
287 _flagserrorclass = error.RevlogError |
425 |
288 |
426 def __init__( |
289 def __init__( |
427 self, |
290 self, |
428 opener, |
291 opener, |
429 indexfile, |
292 target, |
430 datafile=None, |
293 radix, |
|
294 postfix=None, # only exist for `tmpcensored` now |
431 checkambig=False, |
295 checkambig=False, |
432 mmaplargeindex=False, |
296 mmaplargeindex=False, |
433 censorable=False, |
297 censorable=False, |
434 upperboundcomp=None, |
298 upperboundcomp=None, |
435 persistentnodemap=False, |
299 persistentnodemap=False, |
436 concurrencychecker=None, |
300 concurrencychecker=None, |
|
301 trypending=False, |
437 ): |
302 ): |
438 """ |
303 """ |
439 create a revlog object |
304 create a revlog object |
440 |
305 |
441 opener is a function that abstracts the file opening operation |
306 opener is a function that abstracts the file opening operation |
442 and can be used to implement COW semantics or the like. |
307 and can be used to implement COW semantics or the like. |
443 |
308 |
|
309 `target`: a (KIND, ID) tuple that identify the content stored in |
|
310 this revlog. It help the rest of the code to understand what the revlog |
|
311 is about without having to resort to heuristic and index filename |
|
312 analysis. Note: that this must be reliably be set by normal code, but |
|
313 that test, debug, or performance measurement code might not set this to |
|
314 accurate value. |
444 """ |
315 """ |
445 self.upperboundcomp = upperboundcomp |
316 self.upperboundcomp = upperboundcomp |
446 self.indexfile = indexfile |
317 |
447 self.datafile = datafile or (indexfile[:-2] + b".d") |
318 self.radix = radix |
448 self.nodemap_file = None |
319 |
|
320 self._docket_file = None |
|
321 self._indexfile = None |
|
322 self._datafile = None |
|
323 self._sidedatafile = None |
|
324 self._nodemap_file = None |
|
325 self.postfix = postfix |
|
326 self._trypending = trypending |
|
327 self.opener = opener |
449 if persistentnodemap: |
328 if persistentnodemap: |
450 self.nodemap_file = nodemaputil.get_nodemap_file( |
329 self._nodemap_file = nodemaputil.get_nodemap_file(self) |
451 opener, self.indexfile |
330 |
452 ) |
331 assert target[0] in ALL_KINDS |
453 |
332 assert len(target) == 2 |
454 self.opener = opener |
333 self.target = target |
455 # When True, indexfile is opened with checkambig=True at writing, to |
334 # When True, indexfile is opened with checkambig=True at writing, to |
456 # avoid file stat ambiguity. |
335 # avoid file stat ambiguity. |
457 self._checkambig = checkambig |
336 self._checkambig = checkambig |
458 self._mmaplargeindex = mmaplargeindex |
337 self._mmaplargeindex = mmaplargeindex |
459 self._censorable = censorable |
338 self._censorable = censorable |
466 # How much data to read and cache into the raw revlog data cache. |
345 # How much data to read and cache into the raw revlog data cache. |
467 self._chunkcachesize = 65536 |
346 self._chunkcachesize = 65536 |
468 self._maxchainlen = None |
347 self._maxchainlen = None |
469 self._deltabothparents = True |
348 self._deltabothparents = True |
470 self.index = None |
349 self.index = None |
|
350 self._docket = None |
471 self._nodemap_docket = None |
351 self._nodemap_docket = None |
472 # Mapping of partial identifiers to full nodes. |
352 # Mapping of partial identifiers to full nodes. |
473 self._pcache = {} |
353 self._pcache = {} |
474 # Mapping of revision integer to full node. |
354 # Mapping of revision integer to full node. |
475 self._compengine = b'zlib' |
355 self._compengine = b'zlib' |
476 self._compengineopts = {} |
356 self._compengineopts = {} |
477 self._maxdeltachainspan = -1 |
357 self._maxdeltachainspan = -1 |
478 self._withsparseread = False |
358 self._withsparseread = False |
479 self._sparserevlog = False |
359 self._sparserevlog = False |
|
360 self.hassidedata = False |
480 self._srdensitythreshold = 0.50 |
361 self._srdensitythreshold = 0.50 |
481 self._srmingapsize = 262144 |
362 self._srmingapsize = 262144 |
482 |
363 |
483 # Make copy of flag processors so each revlog instance can support |
364 # Make copy of flag processors so each revlog instance can support |
484 # custom flags. |
365 # custom flags. |
485 self._flagprocessors = dict(flagutil.flagprocessors) |
366 self._flagprocessors = dict(flagutil.flagprocessors) |
486 |
367 |
487 # 2-tuple of file handles being used for active writing. |
368 # 3-tuple of file handles being used for active writing. |
488 self._writinghandles = None |
369 self._writinghandles = None |
|
370 # prevent nesting of addgroup |
|
371 self._adding_group = None |
489 |
372 |
490 self._loadindex() |
373 self._loadindex() |
491 |
374 |
492 self._concurrencychecker = concurrencychecker |
375 self._concurrencychecker = concurrencychecker |
493 |
376 |
494 def _loadindex(self): |
377 def _init_opts(self): |
|
378 """process options (from above/config) to setup associated default revlog mode |
|
379 |
|
380 These values might be affected when actually reading on disk information. |
|
381 |
|
382 The relevant values are returned for use in _loadindex(). |
|
383 |
|
384 * newversionflags: |
|
385 version header to use if we need to create a new revlog |
|
386 |
|
387 * mmapindexthreshold: |
|
388 minimal index size for start to use mmap |
|
389 |
|
390 * force_nodemap: |
|
391 force the usage of a "development" version of the nodemap code |
|
392 """ |
495 mmapindexthreshold = None |
393 mmapindexthreshold = None |
496 opts = self.opener.options |
394 opts = self.opener.options |
497 |
395 |
498 if b'revlogv2' in opts: |
396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG: |
499 newversionflags = REVLOGV2 | FLAG_INLINE_DATA |
397 new_header = CHANGELOGV2 |
|
398 elif b'revlogv2' in opts: |
|
399 new_header = REVLOGV2 |
500 elif b'revlogv1' in opts: |
400 elif b'revlogv1' in opts: |
501 newversionflags = REVLOGV1 | FLAG_INLINE_DATA |
401 new_header = REVLOGV1 | FLAG_INLINE_DATA |
502 if b'generaldelta' in opts: |
402 if b'generaldelta' in opts: |
503 newversionflags |= FLAG_GENERALDELTA |
403 new_header |= FLAG_GENERALDELTA |
504 elif b'revlogv0' in self.opener.options: |
404 elif b'revlogv0' in self.opener.options: |
505 newversionflags = REVLOGV0 |
405 new_header = REVLOGV0 |
506 else: |
406 else: |
507 newversionflags = REVLOG_DEFAULT_VERSION |
407 new_header = REVLOG_DEFAULT_VERSION |
508 |
408 |
509 if b'chunkcachesize' in opts: |
409 if b'chunkcachesize' in opts: |
510 self._chunkcachesize = opts[b'chunkcachesize'] |
410 self._chunkcachesize = opts[b'chunkcachesize'] |
511 if b'maxchainlen' in opts: |
411 if b'maxchainlen' in opts: |
512 self._maxchainlen = opts[b'maxchainlen'] |
412 self._maxchainlen = opts[b'maxchainlen'] |
552 elif self._chunkcachesize & (self._chunkcachesize - 1): |
451 elif self._chunkcachesize & (self._chunkcachesize - 1): |
553 raise error.RevlogError( |
452 raise error.RevlogError( |
554 _(b'revlog chunk cache size %r is not a power of 2') |
453 _(b'revlog chunk cache size %r is not a power of 2') |
555 % self._chunkcachesize |
454 % self._chunkcachesize |
556 ) |
455 ) |
557 |
456 force_nodemap = opts.get(b'devel-force-nodemap', False) |
558 indexdata = b'' |
457 return new_header, mmapindexthreshold, force_nodemap |
559 self._initempty = True |
458 |
|
459 def _get_data(self, filepath, mmap_threshold, size=None): |
|
460 """return a file content with or without mmap |
|
461 |
|
462 If the file is missing return the empty string""" |
560 try: |
463 try: |
561 with self._indexfp() as f: |
464 with self.opener(filepath) as fp: |
562 if ( |
465 if mmap_threshold is not None: |
563 mmapindexthreshold is not None |
466 file_size = self.opener.fstat(fp).st_size |
564 and self.opener.fstat(f).st_size >= mmapindexthreshold |
467 if file_size >= mmap_threshold: |
565 ): |
468 if size is not None: |
566 # TODO: should .close() to release resources without |
469 # avoid potentiel mmap crash |
567 # relying on Python GC |
470 size = min(file_size, size) |
568 indexdata = util.buffer(util.mmapread(f)) |
471 # TODO: should .close() to release resources without |
|
472 # relying on Python GC |
|
473 if size is None: |
|
474 return util.buffer(util.mmapread(fp)) |
|
475 else: |
|
476 return util.buffer(util.mmapread(fp, size)) |
|
477 if size is None: |
|
478 return fp.read() |
569 else: |
479 else: |
570 indexdata = f.read() |
480 return fp.read(size) |
571 if len(indexdata) > 0: |
|
572 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0] |
|
573 self._initempty = False |
|
574 else: |
|
575 versionflags = newversionflags |
|
576 except IOError as inst: |
481 except IOError as inst: |
577 if inst.errno != errno.ENOENT: |
482 if inst.errno != errno.ENOENT: |
578 raise |
483 raise |
579 |
484 return b'' |
580 versionflags = newversionflags |
485 |
581 |
486 def _loadindex(self, docket=None): |
582 self.version = versionflags |
487 |
583 |
488 new_header, mmapindexthreshold, force_nodemap = self._init_opts() |
584 flags = versionflags & ~0xFFFF |
489 |
585 fmt = versionflags & 0xFFFF |
490 if self.postfix is not None: |
586 |
491 entry_point = b'%s.i.%s' % (self.radix, self.postfix) |
587 if fmt == REVLOGV0: |
492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix): |
588 if flags: |
493 entry_point = b'%s.i.a' % self.radix |
589 raise error.RevlogError( |
494 else: |
590 _(b'unknown flags (%#04x) in version %d revlog %s') |
495 entry_point = b'%s.i' % self.radix |
591 % (flags >> 16, fmt, self.indexfile) |
496 |
|
497 if docket is not None: |
|
498 self._docket = docket |
|
499 self._docket_file = entry_point |
|
500 else: |
|
501 entry_data = b'' |
|
502 self._initempty = True |
|
503 entry_data = self._get_data(entry_point, mmapindexthreshold) |
|
504 if len(entry_data) > 0: |
|
505 header = INDEX_HEADER.unpack(entry_data[:4])[0] |
|
506 self._initempty = False |
|
507 else: |
|
508 header = new_header |
|
509 |
|
510 self._format_flags = header & ~0xFFFF |
|
511 self._format_version = header & 0xFFFF |
|
512 |
|
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version) |
|
514 if supported_flags is None: |
|
515 msg = _(b'unknown version (%d) in revlog %s') |
|
516 msg %= (self._format_version, self.display_id) |
|
517 raise error.RevlogError(msg) |
|
518 elif self._format_flags & ~supported_flags: |
|
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s') |
|
520 display_flag = self._format_flags >> 16 |
|
521 msg %= (display_flag, self._format_version, self.display_id) |
|
522 raise error.RevlogError(msg) |
|
523 |
|
524 features = FEATURES_BY_VERSION[self._format_version] |
|
525 self._inline = features[b'inline'](self._format_flags) |
|
526 self._generaldelta = features[b'generaldelta'](self._format_flags) |
|
527 self.hassidedata = features[b'sidedata'] |
|
528 |
|
529 if not features[b'docket']: |
|
530 self._indexfile = entry_point |
|
531 index_data = entry_data |
|
532 else: |
|
533 self._docket_file = entry_point |
|
534 if self._initempty: |
|
535 self._docket = docketutil.default_docket(self, header) |
|
536 else: |
|
537 self._docket = docketutil.parse_docket( |
|
538 self, entry_data, use_pending=self._trypending |
|
539 ) |
|
540 |
|
541 if self._docket is not None: |
|
542 self._indexfile = self._docket.index_filepath() |
|
543 index_data = b'' |
|
544 index_size = self._docket.index_end |
|
545 if index_size > 0: |
|
546 index_data = self._get_data( |
|
547 self._indexfile, mmapindexthreshold, size=index_size |
592 ) |
548 ) |
593 |
549 if len(index_data) < index_size: |
594 self._inline = False |
550 msg = _(b'too few index data for %s: got %d, expected %d') |
595 self._generaldelta = False |
551 msg %= (self.display_id, len(index_data), index_size) |
596 |
552 raise error.RevlogError(msg) |
597 elif fmt == REVLOGV1: |
553 |
598 if flags & ~REVLOGV1_FLAGS: |
|
599 raise error.RevlogError( |
|
600 _(b'unknown flags (%#04x) in version %d revlog %s') |
|
601 % (flags >> 16, fmt, self.indexfile) |
|
602 ) |
|
603 |
|
604 self._inline = versionflags & FLAG_INLINE_DATA |
|
605 self._generaldelta = versionflags & FLAG_GENERALDELTA |
|
606 |
|
607 elif fmt == REVLOGV2: |
|
608 if flags & ~REVLOGV2_FLAGS: |
|
609 raise error.RevlogError( |
|
610 _(b'unknown flags (%#04x) in version %d revlog %s') |
|
611 % (flags >> 16, fmt, self.indexfile) |
|
612 ) |
|
613 |
|
614 # There is a bug in the transaction handling when going from an |
|
615 # inline revlog to a separate index and data file. Turn it off until |
|
616 # it's fixed, since v2 revlogs sometimes get rewritten on exchange. |
|
617 # See issue6485 |
|
618 self._inline = False |
554 self._inline = False |
619 # generaldelta implied by version 2 revlogs. |
555 # generaldelta implied by version 2 revlogs. |
620 self._generaldelta = True |
556 self._generaldelta = True |
621 |
557 # the logic for persistent nodemap will be dealt with within the |
|
558 # main docket, so disable it for now. |
|
559 self._nodemap_file = None |
|
560 |
|
561 if self._docket is not None: |
|
562 self._datafile = self._docket.data_filepath() |
|
563 self._sidedatafile = self._docket.sidedata_filepath() |
|
564 elif self.postfix is None: |
|
565 self._datafile = b'%s.d' % self.radix |
622 else: |
566 else: |
623 raise error.RevlogError( |
567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix) |
624 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile) |
|
625 ) |
|
626 |
568 |
627 self.nodeconstants = sha1nodeconstants |
569 self.nodeconstants = sha1nodeconstants |
628 self.nullid = self.nodeconstants.nullid |
570 self.nullid = self.nodeconstants.nullid |
629 |
571 |
630 # sparse-revlog can't be on without general-delta (issue6056) |
572 # sparse-revlog can't be on without general-delta (issue6056) |
632 self._sparserevlog = False |
574 self._sparserevlog = False |
633 |
575 |
634 self._storedeltachains = True |
576 self._storedeltachains = True |
635 |
577 |
636 devel_nodemap = ( |
578 devel_nodemap = ( |
637 self.nodemap_file |
579 self._nodemap_file |
638 and opts.get(b'devel-force-nodemap', False) |
580 and force_nodemap |
639 and NodemapRevlogIO is not None |
581 and parse_index_v1_nodemap is not None |
640 ) |
582 ) |
641 |
583 |
642 use_rust_index = False |
584 use_rust_index = False |
643 if rustrevlog is not None: |
585 if rustrevlog is not None: |
644 if self.nodemap_file is not None: |
586 if self._nodemap_file is not None: |
645 use_rust_index = True |
587 use_rust_index = True |
646 else: |
588 else: |
647 use_rust_index = self.opener.options.get(b'rust.index') |
589 use_rust_index = self.opener.options.get(b'rust.index') |
648 |
590 |
649 self._io = revlogio() |
591 self._parse_index = parse_index_v1 |
650 if self.version == REVLOGV0: |
592 if self._format_version == REVLOGV0: |
651 self._io = revlogoldio() |
593 self._parse_index = revlogv0.parse_index_v0 |
652 elif fmt == REVLOGV2: |
594 elif self._format_version == REVLOGV2: |
653 self._io = revlogv2io() |
595 self._parse_index = parse_index_v2 |
|
596 elif self._format_version == CHANGELOGV2: |
|
597 self._parse_index = parse_index_cl_v2 |
654 elif devel_nodemap: |
598 elif devel_nodemap: |
655 self._io = NodemapRevlogIO() |
599 self._parse_index = parse_index_v1_nodemap |
656 elif use_rust_index: |
600 elif use_rust_index: |
657 self._io = rustrevlogio() |
601 self._parse_index = parse_index_v1_mixed |
658 try: |
602 try: |
659 d = self._io.parseindex(indexdata, self._inline) |
603 d = self._parse_index(index_data, self._inline) |
660 index, _chunkcache = d |
604 index, chunkcache = d |
661 use_nodemap = ( |
605 use_nodemap = ( |
662 not self._inline |
606 not self._inline |
663 and self.nodemap_file is not None |
607 and self._nodemap_file is not None |
664 and util.safehasattr(index, 'update_nodemap_data') |
608 and util.safehasattr(index, 'update_nodemap_data') |
665 ) |
609 ) |
666 if use_nodemap: |
610 if use_nodemap: |
667 nodemap_data = nodemaputil.persisted_data(self) |
611 nodemap_data = nodemaputil.persisted_data(self) |
668 if nodemap_data is not None: |
612 if nodemap_data is not None: |
674 # no changelog tampering |
618 # no changelog tampering |
675 self._nodemap_docket = docket |
619 self._nodemap_docket = docket |
676 index.update_nodemap_data(*nodemap_data) |
620 index.update_nodemap_data(*nodemap_data) |
677 except (ValueError, IndexError): |
621 except (ValueError, IndexError): |
678 raise error.RevlogError( |
622 raise error.RevlogError( |
679 _(b"index %s is corrupted") % self.indexfile |
623 _(b"index %s is corrupted") % self.display_id |
680 ) |
624 ) |
681 self.index, self._chunkcache = d |
625 self.index = index |
682 if not self._chunkcache: |
626 self._segmentfile = randomaccessfile.randomaccessfile( |
683 self._chunkclear() |
627 self.opener, |
|
628 (self._indexfile if self._inline else self._datafile), |
|
629 self._chunkcachesize, |
|
630 chunkcache, |
|
631 ) |
|
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile( |
|
633 self.opener, |
|
634 self._sidedatafile, |
|
635 self._chunkcachesize, |
|
636 ) |
684 # revnum -> (chain-length, sum-delta-length) |
637 # revnum -> (chain-length, sum-delta-length) |
685 self._chaininfocache = util.lrucachedict(500) |
638 self._chaininfocache = util.lrucachedict(500) |
686 # revlog header -> revlog compressor |
639 # revlog header -> revlog compressor |
687 self._decompressors = {} |
640 self._decompressors = {} |
688 |
641 |
689 @util.propertycache |
642 @util.propertycache |
|
643 def revlog_kind(self): |
|
644 return self.target[0] |
|
645 |
|
646 @util.propertycache |
|
647 def display_id(self): |
|
648 """The public facing "ID" of the revlog that we use in message""" |
|
649 # Maybe we should build a user facing representation of |
|
650 # revlog.target instead of using `self.radix` |
|
651 return self.radix |
|
652 |
|
653 def _get_decompressor(self, t): |
|
654 try: |
|
655 compressor = self._decompressors[t] |
|
656 except KeyError: |
|
657 try: |
|
658 engine = util.compengines.forrevlogheader(t) |
|
659 compressor = engine.revlogcompressor(self._compengineopts) |
|
660 self._decompressors[t] = compressor |
|
661 except KeyError: |
|
662 raise error.RevlogError( |
|
663 _(b'unknown compression type %s') % binascii.hexlify(t) |
|
664 ) |
|
665 return compressor |
|
666 |
|
667 @util.propertycache |
690 def _compressor(self): |
668 def _compressor(self): |
691 engine = util.compengines[self._compengine] |
669 engine = util.compengines[self._compengine] |
692 return engine.revlogcompressor(self._compengineopts) |
670 return engine.revlogcompressor(self._compengineopts) |
693 |
671 |
694 def _indexfp(self, mode=b'r'): |
672 @util.propertycache |
|
673 def _decompressor(self): |
|
674 """the default decompressor""" |
|
675 if self._docket is None: |
|
676 return None |
|
677 t = self._docket.default_compression_header |
|
678 c = self._get_decompressor(t) |
|
679 return c.decompress |
|
680 |
|
681 def _indexfp(self): |
695 """file object for the revlog's index file""" |
682 """file object for the revlog's index file""" |
696 args = {'mode': mode} |
683 return self.opener(self._indexfile, mode=b"r") |
697 if mode != b'r': |
684 |
698 args['checkambig'] = self._checkambig |
685 def __index_write_fp(self): |
699 if mode == b'w': |
686 # You should not use this directly and use `_writing` instead |
700 args['atomictemp'] = True |
687 try: |
701 return self.opener(self.indexfile, **args) |
688 f = self.opener( |
|
689 self._indexfile, mode=b"r+", checkambig=self._checkambig |
|
690 ) |
|
691 if self._docket is None: |
|
692 f.seek(0, os.SEEK_END) |
|
693 else: |
|
694 f.seek(self._docket.index_end, os.SEEK_SET) |
|
695 return f |
|
696 except IOError as inst: |
|
697 if inst.errno != errno.ENOENT: |
|
698 raise |
|
699 return self.opener( |
|
700 self._indexfile, mode=b"w+", checkambig=self._checkambig |
|
701 ) |
|
702 |
|
703 def __index_new_fp(self): |
|
704 # You should not use this unless you are upgrading from inline revlog |
|
705 return self.opener( |
|
706 self._indexfile, |
|
707 mode=b"w", |
|
708 checkambig=self._checkambig, |
|
709 atomictemp=True, |
|
710 ) |
702 |
711 |
703 def _datafp(self, mode=b'r'): |
712 def _datafp(self, mode=b'r'): |
704 """file object for the revlog's data file""" |
713 """file object for the revlog's data file""" |
705 return self.opener(self.datafile, mode=mode) |
714 return self.opener(self._datafile, mode=mode) |
706 |
715 |
707 @contextlib.contextmanager |
716 @contextlib.contextmanager |
708 def _datareadfp(self, existingfp=None): |
717 def _sidedatareadfp(self): |
709 """file object suitable to read data""" |
718 """file object suitable to read sidedata""" |
710 # Use explicit file handle, if given. |
719 if self._writinghandles: |
711 if existingfp is not None: |
720 yield self._writinghandles[2] |
712 yield existingfp |
|
713 |
|
714 # Use a file handle being actively used for writes, if available. |
|
715 # There is some danger to doing this because reads will seek the |
|
716 # file. However, _writeentry() performs a SEEK_END before all writes, |
|
717 # so we should be safe. |
|
718 elif self._writinghandles: |
|
719 if self._inline: |
|
720 yield self._writinghandles[0] |
|
721 else: |
|
722 yield self._writinghandles[1] |
|
723 |
|
724 # Otherwise open a new file handle. |
|
725 else: |
721 else: |
726 if self._inline: |
722 with self.opener(self._sidedatafile) as fp: |
727 func = self._indexfp |
|
728 else: |
|
729 func = self._datafp |
|
730 with func() as fp: |
|
731 yield fp |
723 yield fp |
732 |
724 |
733 def tiprev(self): |
725 def tiprev(self): |
734 return len(self.index) - 1 |
726 return len(self.index) - 1 |
735 |
727 |
783 ): |
775 ): |
784 return False |
776 return False |
785 return True |
777 return True |
786 |
778 |
787 def update_caches(self, transaction): |
779 def update_caches(self, transaction): |
788 if self.nodemap_file is not None: |
780 if self._nodemap_file is not None: |
789 if transaction is None: |
781 if transaction is None: |
790 nodemaputil.update_persistent_nodemap(self) |
782 nodemaputil.update_persistent_nodemap(self) |
791 else: |
783 else: |
792 nodemaputil.setup_persistent_nodemap(transaction, self) |
784 nodemaputil.setup_persistent_nodemap(transaction, self) |
793 |
785 |
794 def clearcaches(self): |
786 def clearcaches(self): |
795 self._revisioncache = None |
787 self._revisioncache = None |
796 self._chainbasecache.clear() |
788 self._chainbasecache.clear() |
797 self._chunkcache = (0, b'') |
789 self._segmentfile.clear_cache() |
|
790 self._segmentfile_sidedata.clear_cache() |
798 self._pcache = {} |
791 self._pcache = {} |
799 self._nodemap_docket = None |
792 self._nodemap_docket = None |
800 self.index.clearcaches() |
793 self.index.clearcaches() |
801 # The python code is the one responsible for validating the docket, we |
794 # The python code is the one responsible for validating the docket, we |
802 # end up having to refresh it here. |
795 # end up having to refresh it here. |
803 use_nodemap = ( |
796 use_nodemap = ( |
804 not self._inline |
797 not self._inline |
805 and self.nodemap_file is not None |
798 and self._nodemap_file is not None |
806 and util.safehasattr(self.index, 'update_nodemap_data') |
799 and util.safehasattr(self.index, 'update_nodemap_data') |
807 ) |
800 ) |
808 if use_nodemap: |
801 if use_nodemap: |
809 nodemap_data = nodemaputil.persisted_data(self) |
802 nodemap_data = nodemaputil.persisted_data(self) |
810 if nodemap_data is not None: |
803 if nodemap_data is not None: |
816 return self.index.rev(node) |
809 return self.index.rev(node) |
817 except TypeError: |
810 except TypeError: |
818 raise |
811 raise |
819 except error.RevlogError: |
812 except error.RevlogError: |
820 # parsers.c radix tree lookup failed |
813 # parsers.c radix tree lookup failed |
821 if node == wdirid or node in wdirfilenodeids: |
814 if ( |
|
815 node == self.nodeconstants.wdirid |
|
816 or node in self.nodeconstants.wdirfilenodeids |
|
817 ): |
822 raise error.WdirUnsupported |
818 raise error.WdirUnsupported |
823 raise error.LookupError(node, self.indexfile, _(b'no node')) |
819 raise error.LookupError(node, self.display_id, _(b'no node')) |
824 |
820 |
825 # Accessors for index entries. |
821 # Accessors for index entries. |
826 |
822 |
827 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes |
823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes |
828 # are flags. |
824 # are flags. |
829 def start(self, rev): |
825 def start(self, rev): |
830 return int(self.index[rev][0] >> 16) |
826 return int(self.index[rev][0] >> 16) |
831 |
827 |
|
828 def sidedata_cut_off(self, rev): |
|
829 sd_cut_off = self.index[rev][8] |
|
830 if sd_cut_off != 0: |
|
831 return sd_cut_off |
|
832 # This is some annoying dance, because entries without sidedata |
|
833 # currently use 0 as their ofsset. (instead of previous-offset + |
|
834 # previous-size) |
|
835 # |
|
836 # We should reconsider this sidedata → 0 sidata_offset policy. |
|
837 # In the meantime, we need this. |
|
838 while 0 <= rev: |
|
839 e = self.index[rev] |
|
840 if e[9] != 0: |
|
841 return e[8] + e[9] |
|
842 rev -= 1 |
|
843 return 0 |
|
844 |
832 def flags(self, rev): |
845 def flags(self, rev): |
833 return self.index[rev][0] & 0xFFFF |
846 return self.index[rev][0] & 0xFFFF |
834 |
847 |
835 def length(self, rev): |
848 def length(self, rev): |
836 return self.index[rev][1] |
849 return self.index[rev][1] |
837 |
850 |
838 def sidedata_length(self, rev): |
851 def sidedata_length(self, rev): |
839 if self.version & 0xFFFF != REVLOGV2: |
852 if not self.hassidedata: |
840 return 0 |
853 return 0 |
841 return self.index[rev][9] |
854 return self.index[rev][9] |
842 |
855 |
843 def rawsize(self, rev): |
856 def rawsize(self, rev): |
844 """return the length of the uncompressed text for a given revision""" |
857 """return the length of the uncompressed text for a given revision""" |
1444 if rev < 0 or rev >= len(self): |
1461 if rev < 0 or rev >= len(self): |
1445 raise ValueError |
1462 raise ValueError |
1446 return self.node(rev) |
1463 return self.node(rev) |
1447 except (ValueError, OverflowError): |
1464 except (ValueError, OverflowError): |
1448 pass |
1465 pass |
1449 if len(id) == 40: |
1466 if len(id) == 2 * self.nodeconstants.nodelen: |
1450 try: |
1467 try: |
1451 # a full hex nodeid? |
1468 # a full hex nodeid? |
1452 node = bin(id) |
1469 node = bin(id) |
1453 self.rev(node) |
1470 self.rev(node) |
1454 return node |
1471 return node |
1455 except (TypeError, error.LookupError): |
1472 except (TypeError, error.LookupError): |
1456 pass |
1473 pass |
1457 |
1474 |
1458 def _partialmatch(self, id): |
1475 def _partialmatch(self, id): |
1459 # we don't care wdirfilenodeids as they should be always full hash |
1476 # we don't care wdirfilenodeids as they should be always full hash |
1460 maybewdir = wdirhex.startswith(id) |
1477 maybewdir = self.nodeconstants.wdirhex.startswith(id) |
|
1478 ambiguous = False |
1461 try: |
1479 try: |
1462 partial = self.index.partialmatch(id) |
1480 partial = self.index.partialmatch(id) |
1463 if partial and self.hasnode(partial): |
1481 if partial and self.hasnode(partial): |
1464 if maybewdir: |
1482 if maybewdir: |
1465 # single 'ff...' match in radix tree, ambiguous with wdir |
1483 # single 'ff...' match in radix tree, ambiguous with wdir |
1466 raise error.RevlogError |
1484 ambiguous = True |
1467 return partial |
1485 else: |
1468 if maybewdir: |
1486 return partial |
|
1487 elif maybewdir: |
1469 # no 'ff...' match in radix tree, wdir identified |
1488 # no 'ff...' match in radix tree, wdir identified |
1470 raise error.WdirUnsupported |
1489 raise error.WdirUnsupported |
1471 return None |
1490 else: |
|
1491 return None |
1472 except error.RevlogError: |
1492 except error.RevlogError: |
1473 # parsers.c radix tree lookup gave multiple matches |
1493 # parsers.c radix tree lookup gave multiple matches |
1474 # fast path: for unfiltered changelog, radix tree is accurate |
1494 # fast path: for unfiltered changelog, radix tree is accurate |
1475 if not getattr(self, 'filteredrevs', None): |
1495 if not getattr(self, 'filteredrevs', None): |
1476 raise error.AmbiguousPrefixLookupError( |
1496 ambiguous = True |
1477 id, self.indexfile, _(b'ambiguous identifier') |
|
1478 ) |
|
1479 # fall through to slow path that filters hidden revisions |
1497 # fall through to slow path that filters hidden revisions |
1480 except (AttributeError, ValueError): |
1498 except (AttributeError, ValueError): |
1481 # we are pure python, or key was too short to search radix tree |
1499 # we are pure python, or key was too short to search radix tree |
1482 pass |
1500 pass |
|
1501 if ambiguous: |
|
1502 raise error.AmbiguousPrefixLookupError( |
|
1503 id, self.display_id, _(b'ambiguous identifier') |
|
1504 ) |
1483 |
1505 |
1484 if id in self._pcache: |
1506 if id in self._pcache: |
1485 return self._pcache[id] |
1507 return self._pcache[id] |
1486 |
1508 |
1487 if len(id) <= 40: |
1509 if len(id) <= 40: |
1576 |
1600 |
1577 returns True if text is different than what is stored. |
1601 returns True if text is different than what is stored. |
1578 """ |
1602 """ |
1579 p1, p2 = self.parents(node) |
1603 p1, p2 = self.parents(node) |
1580 return storageutil.hashrevisionsha1(text, p1, p2) != node |
1604 return storageutil.hashrevisionsha1(text, p1, p2) != node |
1581 |
|
1582 def _cachesegment(self, offset, data): |
|
1583 """Add a segment to the revlog cache. |
|
1584 |
|
1585 Accepts an absolute offset and the data that is at that location. |
|
1586 """ |
|
1587 o, d = self._chunkcache |
|
1588 # try to add to existing cache |
|
1589 if o + len(d) == offset and len(d) + len(data) < _chunksize: |
|
1590 self._chunkcache = o, d + data |
|
1591 else: |
|
1592 self._chunkcache = offset, data |
|
1593 |
|
1594 def _readsegment(self, offset, length, df=None): |
|
1595 """Load a segment of raw data from the revlog. |
|
1596 |
|
1597 Accepts an absolute offset, length to read, and an optional existing |
|
1598 file handle to read from. |
|
1599 |
|
1600 If an existing file handle is passed, it will be seeked and the |
|
1601 original seek position will NOT be restored. |
|
1602 |
|
1603 Returns a str or buffer of raw byte data. |
|
1604 |
|
1605 Raises if the requested number of bytes could not be read. |
|
1606 """ |
|
1607 # Cache data both forward and backward around the requested |
|
1608 # data, in a fixed size window. This helps speed up operations |
|
1609 # involving reading the revlog backwards. |
|
1610 cachesize = self._chunkcachesize |
|
1611 realoffset = offset & ~(cachesize - 1) |
|
1612 reallength = ( |
|
1613 (offset + length + cachesize) & ~(cachesize - 1) |
|
1614 ) - realoffset |
|
1615 with self._datareadfp(df) as df: |
|
1616 df.seek(realoffset) |
|
1617 d = df.read(reallength) |
|
1618 |
|
1619 self._cachesegment(realoffset, d) |
|
1620 if offset != realoffset or reallength != length: |
|
1621 startoffset = offset - realoffset |
|
1622 if len(d) - startoffset < length: |
|
1623 raise error.RevlogError( |
|
1624 _( |
|
1625 b'partial read of revlog %s; expected %d bytes from ' |
|
1626 b'offset %d, got %d' |
|
1627 ) |
|
1628 % ( |
|
1629 self.indexfile if self._inline else self.datafile, |
|
1630 length, |
|
1631 realoffset, |
|
1632 len(d) - startoffset, |
|
1633 ) |
|
1634 ) |
|
1635 |
|
1636 return util.buffer(d, startoffset, length) |
|
1637 |
|
1638 if len(d) < length: |
|
1639 raise error.RevlogError( |
|
1640 _( |
|
1641 b'partial read of revlog %s; expected %d bytes from offset ' |
|
1642 b'%d, got %d' |
|
1643 ) |
|
1644 % ( |
|
1645 self.indexfile if self._inline else self.datafile, |
|
1646 length, |
|
1647 offset, |
|
1648 len(d), |
|
1649 ) |
|
1650 ) |
|
1651 |
|
1652 return d |
|
1653 |
|
1654 def _getsegment(self, offset, length, df=None): |
|
1655 """Obtain a segment of raw data from the revlog. |
|
1656 |
|
1657 Accepts an absolute offset, length of bytes to obtain, and an |
|
1658 optional file handle to the already-opened revlog. If the file |
|
1659 handle is used, it's original seek position will not be preserved. |
|
1660 |
|
1661 Requests for data may be returned from a cache. |
|
1662 |
|
1663 Returns a str or a buffer instance of raw byte data. |
|
1664 """ |
|
1665 o, d = self._chunkcache |
|
1666 l = len(d) |
|
1667 |
|
1668 # is it in the cache? |
|
1669 cachestart = offset - o |
|
1670 cacheend = cachestart + length |
|
1671 if cachestart >= 0 and cacheend <= l: |
|
1672 if cachestart == 0 and cacheend == l: |
|
1673 return d # avoid a copy |
|
1674 return util.buffer(d, cachestart, cacheend - cachestart) |
|
1675 |
|
1676 return self._readsegment(offset, length, df=df) |
|
1677 |
1605 |
1678 def _getsegmentforrevs(self, startrev, endrev, df=None): |
1606 def _getsegmentforrevs(self, startrev, endrev, df=None): |
1679 """Obtain a segment of raw data corresponding to a range of revisions. |
1607 """Obtain a segment of raw data corresponding to a range of revisions. |
1680 |
1608 |
1681 Accepts the start and end revisions and an optional already-open |
1609 Accepts the start and end revisions and an optional already-open |
1705 if self._inline: |
1633 if self._inline: |
1706 start += (startrev + 1) * self.index.entry_size |
1634 start += (startrev + 1) * self.index.entry_size |
1707 end += (endrev + 1) * self.index.entry_size |
1635 end += (endrev + 1) * self.index.entry_size |
1708 length = end - start |
1636 length = end - start |
1709 |
1637 |
1710 return start, self._getsegment(start, length, df=df) |
1638 return start, self._segmentfile.read_chunk(start, length, df) |
1711 |
1639 |
1712 def _chunk(self, rev, df=None): |
1640 def _chunk(self, rev, df=None): |
1713 """Obtain a single decompressed chunk for a revision. |
1641 """Obtain a single decompressed chunk for a revision. |
1714 |
1642 |
1715 Accepts an integer revision and an optional already-open file handle |
1643 Accepts an integer revision and an optional already-open file handle |
1716 to be used for reading. If used, the seek position of the file will not |
1644 to be used for reading. If used, the seek position of the file will not |
1717 be preserved. |
1645 be preserved. |
1718 |
1646 |
1719 Returns a str holding uncompressed data for the requested revision. |
1647 Returns a str holding uncompressed data for the requested revision. |
1720 """ |
1648 """ |
1721 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1]) |
1649 compression_mode = self.index[rev][10] |
|
1650 data = self._getsegmentforrevs(rev, rev, df=df)[1] |
|
1651 if compression_mode == COMP_MODE_PLAIN: |
|
1652 return data |
|
1653 elif compression_mode == COMP_MODE_DEFAULT: |
|
1654 return self._decompressor(data) |
|
1655 elif compression_mode == COMP_MODE_INLINE: |
|
1656 return self.decompress(data) |
|
1657 else: |
|
1658 msg = b'unknown compression mode %d' |
|
1659 msg %= compression_mode |
|
1660 raise error.RevlogError(msg) |
1722 |
1661 |
1723 def _chunks(self, revs, df=None, targetsize=None): |
1662 def _chunks(self, revs, df=None, targetsize=None): |
1724 """Obtain decompressed chunks for the specified revisions. |
1663 """Obtain decompressed chunks for the specified revisions. |
1725 |
1664 |
1726 Accepts an iterable of numeric revisions that are assumed to be in |
1665 Accepts an iterable of numeric revisions that are assumed to be in |
1764 # issue4215 - we can't cache a run of chunks greater than |
1703 # issue4215 - we can't cache a run of chunks greater than |
1765 # 2G on Windows |
1704 # 2G on Windows |
1766 return [self._chunk(rev, df=df) for rev in revschunk] |
1705 return [self._chunk(rev, df=df) for rev in revschunk] |
1767 |
1706 |
1768 decomp = self.decompress |
1707 decomp = self.decompress |
|
1708 # self._decompressor might be None, but will not be used in that case |
|
1709 def_decomp = self._decompressor |
1769 for rev in revschunk: |
1710 for rev in revschunk: |
1770 chunkstart = start(rev) |
1711 chunkstart = start(rev) |
1771 if inline: |
1712 if inline: |
1772 chunkstart += (rev + 1) * iosize |
1713 chunkstart += (rev + 1) * iosize |
1773 chunklength = length(rev) |
1714 chunklength = length(rev) |
1774 ladd(decomp(buffer(data, chunkstart - offset, chunklength))) |
1715 comp_mode = self.index[rev][10] |
|
1716 c = buffer(data, chunkstart - offset, chunklength) |
|
1717 if comp_mode == COMP_MODE_PLAIN: |
|
1718 ladd(c) |
|
1719 elif comp_mode == COMP_MODE_INLINE: |
|
1720 ladd(decomp(c)) |
|
1721 elif comp_mode == COMP_MODE_DEFAULT: |
|
1722 ladd(def_decomp(c)) |
|
1723 else: |
|
1724 msg = b'unknown compression mode %d' |
|
1725 msg %= comp_mode |
|
1726 raise error.RevlogError(msg) |
1775 |
1727 |
1776 return l |
1728 return l |
1777 |
|
1778 def _chunkclear(self): |
|
1779 """Clear the raw chunk cache.""" |
|
1780 self._chunkcache = (0, b'') |
|
1781 |
1729 |
1782 def deltaparent(self, rev): |
1730 def deltaparent(self, rev): |
1783 """return deltaparent of the given revision""" |
1731 """return deltaparent of the given revision""" |
1784 base = self.index[rev][3] |
1732 base = self.index[rev][3] |
1785 if base == rev: |
1733 if base == rev: |
1852 msg = ( |
1800 msg = ( |
1853 b'revlog.revision(..., raw=True) is deprecated, ' |
1801 b'revlog.revision(..., raw=True) is deprecated, ' |
1854 b'use revlog.rawdata(...)' |
1802 b'use revlog.rawdata(...)' |
1855 ) |
1803 ) |
1856 util.nouideprecwarn(msg, b'5.2', stacklevel=2) |
1804 util.nouideprecwarn(msg, b'5.2', stacklevel=2) |
1857 return self._revisiondata(nodeorrev, _df, raw=raw)[0] |
1805 return self._revisiondata(nodeorrev, _df, raw=raw) |
1858 |
1806 |
1859 def sidedata(self, nodeorrev, _df=None): |
1807 def sidedata(self, nodeorrev, _df=None): |
1860 """a map of extra data related to the changeset but not part of the hash |
1808 """a map of extra data related to the changeset but not part of the hash |
1861 |
1809 |
1862 This function currently return a dictionary. However, more advanced |
1810 This function currently return a dictionary. However, more advanced |
1863 mapping object will likely be used in the future for a more |
1811 mapping object will likely be used in the future for a more |
1864 efficient/lazy code. |
1812 efficient/lazy code. |
1865 """ |
1813 """ |
1866 return self._revisiondata(nodeorrev, _df)[1] |
1814 # deal with <nodeorrev> argument type |
|
1815 if isinstance(nodeorrev, int): |
|
1816 rev = nodeorrev |
|
1817 else: |
|
1818 rev = self.rev(nodeorrev) |
|
1819 return self._sidedata(rev) |
1867 |
1820 |
1868 def _revisiondata(self, nodeorrev, _df=None, raw=False): |
1821 def _revisiondata(self, nodeorrev, _df=None, raw=False): |
1869 # deal with <nodeorrev> argument type |
1822 # deal with <nodeorrev> argument type |
1870 if isinstance(nodeorrev, int): |
1823 if isinstance(nodeorrev, int): |
1871 rev = nodeorrev |
1824 rev = nodeorrev |
1873 else: |
1826 else: |
1874 node = nodeorrev |
1827 node = nodeorrev |
1875 rev = None |
1828 rev = None |
1876 |
1829 |
1877 # fast path the special `nullid` rev |
1830 # fast path the special `nullid` rev |
1878 if node == nullid: |
1831 if node == self.nullid: |
1879 return b"", {} |
1832 return b"" |
1880 |
1833 |
1881 # ``rawtext`` is the text as stored inside the revlog. Might be the |
1834 # ``rawtext`` is the text as stored inside the revlog. Might be the |
1882 # revision or might need to be processed to retrieve the revision. |
1835 # revision or might need to be processed to retrieve the revision. |
1883 rev, rawtext, validated = self._rawtext(node, rev, _df=_df) |
1836 rev, rawtext, validated = self._rawtext(node, rev, _df=_df) |
1884 |
1837 |
1885 if self.version & 0xFFFF == REVLOGV2: |
|
1886 if rev is None: |
|
1887 rev = self.rev(node) |
|
1888 sidedata = self._sidedata(rev) |
|
1889 else: |
|
1890 sidedata = {} |
|
1891 |
|
1892 if raw and validated: |
1838 if raw and validated: |
1893 # if we don't want to process the raw text and that raw |
1839 # if we don't want to process the raw text and that raw |
1894 # text is cached, we can exit early. |
1840 # text is cached, we can exit early. |
1895 return rawtext, sidedata |
1841 return rawtext |
1896 if rev is None: |
1842 if rev is None: |
1897 rev = self.rev(node) |
1843 rev = self.rev(node) |
1898 # the revlog's flag for this revision |
1844 # the revlog's flag for this revision |
1899 # (usually alter its state or content) |
1845 # (usually alter its state or content) |
1900 flags = self.flags(rev) |
1846 flags = self.flags(rev) |
1901 |
1847 |
1902 if validated and flags == REVIDX_DEFAULT_FLAGS: |
1848 if validated and flags == REVIDX_DEFAULT_FLAGS: |
1903 # no extra flags set, no flag processor runs, text = rawtext |
1849 # no extra flags set, no flag processor runs, text = rawtext |
1904 return rawtext, sidedata |
1850 return rawtext |
1905 |
1851 |
1906 if raw: |
1852 if raw: |
1907 validatehash = flagutil.processflagsraw(self, rawtext, flags) |
1853 validatehash = flagutil.processflagsraw(self, rawtext, flags) |
1908 text = rawtext |
1854 text = rawtext |
1909 else: |
1855 else: |
1968 if self._inline: |
1914 if self._inline: |
1969 sidedata_offset += self.index.entry_size * (1 + rev) |
1915 sidedata_offset += self.index.entry_size * (1 + rev) |
1970 if sidedata_size == 0: |
1916 if sidedata_size == 0: |
1971 return {} |
1917 return {} |
1972 |
1918 |
1973 segment = self._getsegment(sidedata_offset, sidedata_size) |
1919 if self._docket.sidedata_end < sidedata_offset + sidedata_size: |
|
1920 filename = self._sidedatafile |
|
1921 end = self._docket.sidedata_end |
|
1922 offset = sidedata_offset |
|
1923 length = sidedata_size |
|
1924 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end) |
|
1925 raise error.RevlogError(m) |
|
1926 |
|
1927 comp_segment = self._segmentfile_sidedata.read_chunk( |
|
1928 sidedata_offset, sidedata_size |
|
1929 ) |
|
1930 |
|
1931 comp = self.index[rev][11] |
|
1932 if comp == COMP_MODE_PLAIN: |
|
1933 segment = comp_segment |
|
1934 elif comp == COMP_MODE_DEFAULT: |
|
1935 segment = self._decompressor(comp_segment) |
|
1936 elif comp == COMP_MODE_INLINE: |
|
1937 segment = self.decompress(comp_segment) |
|
1938 else: |
|
1939 msg = b'unknown compression mode %d' |
|
1940 msg %= comp |
|
1941 raise error.RevlogError(msg) |
|
1942 |
1974 sidedata = sidedatautil.deserialize_sidedata(segment) |
1943 sidedata = sidedatautil.deserialize_sidedata(segment) |
1975 return sidedata |
1944 return sidedata |
1976 |
1945 |
1977 def rawdata(self, nodeorrev, _df=None): |
1946 def rawdata(self, nodeorrev, _df=None): |
1978 """return an uncompressed raw data of a given node or revision number. |
1947 """return an uncompressed raw data of a given node or revision number. |
1979 |
1948 |
1980 _df - an existing file handle to read from. (internal-only) |
1949 _df - an existing file handle to read from. (internal-only) |
1981 """ |
1950 """ |
1982 return self._revisiondata(nodeorrev, _df, raw=True)[0] |
1951 return self._revisiondata(nodeorrev, _df, raw=True) |
1983 |
1952 |
1984 def hash(self, text, p1, p2): |
1953 def hash(self, text, p1, p2): |
1985 """Compute a node hash. |
1954 """Compute a node hash. |
1986 |
1955 |
1987 Available as a function so that subclasses can replace the hash |
1956 Available as a function so that subclasses can replace the hash |
2011 revornode = rev |
1980 revornode = rev |
2012 if revornode is None: |
1981 if revornode is None: |
2013 revornode = templatefilters.short(hex(node)) |
1982 revornode = templatefilters.short(hex(node)) |
2014 raise error.RevlogError( |
1983 raise error.RevlogError( |
2015 _(b"integrity check failed on %s:%s") |
1984 _(b"integrity check failed on %s:%s") |
2016 % (self.indexfile, pycompat.bytestr(revornode)) |
1985 % (self.display_id, pycompat.bytestr(revornode)) |
2017 ) |
1986 ) |
2018 except error.RevlogError: |
1987 except error.RevlogError: |
2019 if self._censorable and storageutil.iscensoredtext(text): |
1988 if self._censorable and storageutil.iscensoredtext(text): |
2020 raise error.CensoredNodeError(self.indexfile, node, text) |
1989 raise error.CensoredNodeError(self.display_id, node, text) |
2021 raise |
1990 raise |
2022 |
1991 |
2023 def _enforceinlinesize(self, tr, fp=None): |
1992 def _enforceinlinesize(self, tr): |
2024 """Check if the revlog is too big for inline and convert if so. |
1993 """Check if the revlog is too big for inline and convert if so. |
2025 |
1994 |
2026 This should be called after revisions are added to the revlog. If the |
1995 This should be called after revisions are added to the revlog. If the |
2027 revlog has grown too large to be an inline revlog, it will convert it |
1996 revlog has grown too large to be an inline revlog, it will convert it |
2028 to use multiple index and data files. |
1997 to use multiple index and data files. |
2029 """ |
1998 """ |
2030 tiprev = len(self) - 1 |
1999 tiprev = len(self) - 1 |
2031 if ( |
2000 total_size = self.start(tiprev) + self.length(tiprev) |
2032 not self._inline |
2001 if not self._inline or total_size < _maxinline: |
2033 or (self.start(tiprev) + self.length(tiprev)) < _maxinline |
|
2034 ): |
|
2035 return |
2002 return |
2036 |
2003 |
2037 troffset = tr.findoffset(self.indexfile) |
2004 troffset = tr.findoffset(self._indexfile) |
2038 if troffset is None: |
2005 if troffset is None: |
2039 raise error.RevlogError( |
2006 raise error.RevlogError( |
2040 _(b"%s not found in the transaction") % self.indexfile |
2007 _(b"%s not found in the transaction") % self._indexfile |
2041 ) |
2008 ) |
2042 trindex = 0 |
2009 trindex = 0 |
2043 tr.add(self.datafile, 0) |
2010 tr.add(self._datafile, 0) |
2044 |
2011 |
2045 if fp: |
2012 existing_handles = False |
|
2013 if self._writinghandles is not None: |
|
2014 existing_handles = True |
|
2015 fp = self._writinghandles[0] |
2046 fp.flush() |
2016 fp.flush() |
2047 fp.close() |
2017 fp.close() |
2048 # We can't use the cached file handle after close(). So prevent |
2018 # We can't use the cached file handle after close(). So prevent |
2049 # its usage. |
2019 # its usage. |
2050 self._writinghandles = None |
2020 self._writinghandles = None |
2051 |
2021 self._segmentfile.writing_handle = None |
2052 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh: |
2022 # No need to deal with sidedata writing handle as it is only |
2053 for r in self: |
2023 # relevant with revlog-v2 which is never inline, not reaching |
2054 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1]) |
2024 # this code |
2055 if troffset <= self.start(r): |
2025 |
2056 trindex = r |
2026 new_dfh = self._datafp(b'w+') |
2057 |
2027 new_dfh.truncate(0) # drop any potentially existing data |
2058 with self._indexfp(b'w') as fp: |
2028 try: |
2059 self.version &= ~FLAG_INLINE_DATA |
2029 with self._indexfp() as read_ifh: |
2060 self._inline = False |
2030 for r in self: |
2061 io = self._io |
2031 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1]) |
2062 for i in self: |
2032 if troffset <= self.start(r) + r * self.index.entry_size: |
2063 e = io.packentry(self.index[i], self.node, self.version, i) |
2033 trindex = r |
2064 fp.write(e) |
2034 new_dfh.flush() |
2065 |
2035 |
2066 # the temp file replace the real index when we exit the context |
2036 with self.__index_new_fp() as fp: |
2067 # manager |
2037 self._format_flags &= ~FLAG_INLINE_DATA |
2068 |
2038 self._inline = False |
2069 tr.replace(self.indexfile, trindex * self.index.entry_size) |
2039 for i in self: |
2070 nodemaputil.setup_persistent_nodemap(tr, self) |
2040 e = self.index.entry_binary(i) |
2071 self._chunkclear() |
2041 if i == 0 and self._docket is None: |
|
2042 header = self._format_flags | self._format_version |
|
2043 header = self.index.pack_header(header) |
|
2044 e = header + e |
|
2045 fp.write(e) |
|
2046 if self._docket is not None: |
|
2047 self._docket.index_end = fp.tell() |
|
2048 |
|
2049 # There is a small transactional race here. If the rename of |
|
2050 # the index fails, we should remove the datafile. It is more |
|
2051 # important to ensure that the data file is not truncated |
|
2052 # when the index is replaced as otherwise data is lost. |
|
2053 tr.replace(self._datafile, self.start(trindex)) |
|
2054 |
|
2055 # the temp file replace the real index when we exit the context |
|
2056 # manager |
|
2057 |
|
2058 tr.replace(self._indexfile, trindex * self.index.entry_size) |
|
2059 nodemaputil.setup_persistent_nodemap(tr, self) |
|
2060 self._segmentfile = randomaccessfile.randomaccessfile( |
|
2061 self.opener, |
|
2062 self._datafile, |
|
2063 self._chunkcachesize, |
|
2064 ) |
|
2065 |
|
2066 if existing_handles: |
|
2067 # switched from inline to conventional reopen the index |
|
2068 ifh = self.__index_write_fp() |
|
2069 self._writinghandles = (ifh, new_dfh, None) |
|
2070 self._segmentfile.writing_handle = new_dfh |
|
2071 new_dfh = None |
|
2072 # No need to deal with sidedata writing handle as it is only |
|
2073 # relevant with revlog-v2 which is never inline, not reaching |
|
2074 # this code |
|
2075 finally: |
|
2076 if new_dfh is not None: |
|
2077 new_dfh.close() |
2072 |
2078 |
2073 def _nodeduplicatecallback(self, transaction, node): |
2079 def _nodeduplicatecallback(self, transaction, node): |
2074 """called when trying to add a node already stored.""" |
2080 """called when trying to add a node already stored.""" |
|
2081 |
|
2082 @contextlib.contextmanager |
|
2083 def reading(self): |
|
2084 """Context manager that keeps data and sidedata files open for reading""" |
|
2085 with self._segmentfile.reading(): |
|
2086 with self._segmentfile_sidedata.reading(): |
|
2087 yield |
|
2088 |
|
2089 @contextlib.contextmanager |
|
2090 def _writing(self, transaction): |
|
2091 if self._trypending: |
|
2092 msg = b'try to write in a `trypending` revlog: %s' |
|
2093 msg %= self.display_id |
|
2094 raise error.ProgrammingError(msg) |
|
2095 if self._writinghandles is not None: |
|
2096 yield |
|
2097 else: |
|
2098 ifh = dfh = sdfh = None |
|
2099 try: |
|
2100 r = len(self) |
|
2101 # opening the data file. |
|
2102 dsize = 0 |
|
2103 if r: |
|
2104 dsize = self.end(r - 1) |
|
2105 dfh = None |
|
2106 if not self._inline: |
|
2107 try: |
|
2108 dfh = self._datafp(b"r+") |
|
2109 if self._docket is None: |
|
2110 dfh.seek(0, os.SEEK_END) |
|
2111 else: |
|
2112 dfh.seek(self._docket.data_end, os.SEEK_SET) |
|
2113 except IOError as inst: |
|
2114 if inst.errno != errno.ENOENT: |
|
2115 raise |
|
2116 dfh = self._datafp(b"w+") |
|
2117 transaction.add(self._datafile, dsize) |
|
2118 if self._sidedatafile is not None: |
|
2119 try: |
|
2120 sdfh = self.opener(self._sidedatafile, mode=b"r+") |
|
2121 dfh.seek(self._docket.sidedata_end, os.SEEK_SET) |
|
2122 except IOError as inst: |
|
2123 if inst.errno != errno.ENOENT: |
|
2124 raise |
|
2125 sdfh = self.opener(self._sidedatafile, mode=b"w+") |
|
2126 transaction.add( |
|
2127 self._sidedatafile, self._docket.sidedata_end |
|
2128 ) |
|
2129 |
|
2130 # opening the index file. |
|
2131 isize = r * self.index.entry_size |
|
2132 ifh = self.__index_write_fp() |
|
2133 if self._inline: |
|
2134 transaction.add(self._indexfile, dsize + isize) |
|
2135 else: |
|
2136 transaction.add(self._indexfile, isize) |
|
2137 # exposing all file handle for writing. |
|
2138 self._writinghandles = (ifh, dfh, sdfh) |
|
2139 self._segmentfile.writing_handle = ifh if self._inline else dfh |
|
2140 self._segmentfile_sidedata.writing_handle = sdfh |
|
2141 yield |
|
2142 if self._docket is not None: |
|
2143 self._write_docket(transaction) |
|
2144 finally: |
|
2145 self._writinghandles = None |
|
2146 self._segmentfile.writing_handle = None |
|
2147 self._segmentfile_sidedata.writing_handle = None |
|
2148 if dfh is not None: |
|
2149 dfh.close() |
|
2150 if sdfh is not None: |
|
2151 sdfh.close() |
|
2152 # closing the index file last to avoid exposing referent to |
|
2153 # potential unflushed data content. |
|
2154 if ifh is not None: |
|
2155 ifh.close() |
|
2156 |
|
2157 def _write_docket(self, transaction): |
|
2158 """write the current docket on disk |
|
2159 |
|
2160 Exist as a method to help changelog to implement transaction logic |
|
2161 |
|
2162 We could also imagine using the same transaction logic for all revlog |
|
2163 since docket are cheap.""" |
|
2164 self._docket.write(transaction) |
2075 |
2165 |
2076 def addrevision( |
2166 def addrevision( |
2077 self, |
2167 self, |
2078 text, |
2168 text, |
2079 transaction, |
2169 transaction, |
2294 |
2362 |
2295 invariants: |
2363 invariants: |
2296 - rawtext is optional (can be None); if not set, cachedelta must be set. |
2364 - rawtext is optional (can be None); if not set, cachedelta must be set. |
2297 if both are set, they must correspond to each other. |
2365 if both are set, they must correspond to each other. |
2298 """ |
2366 """ |
2299 if node == nullid: |
2367 if node == self.nullid: |
2300 raise error.RevlogError( |
2368 raise error.RevlogError( |
2301 _(b"%s: attempt to add null revision") % self.indexfile |
2369 _(b"%s: attempt to add null revision") % self.display_id |
2302 ) |
2370 ) |
2303 if node == wdirid or node in wdirfilenodeids: |
2371 if ( |
|
2372 node == self.nodeconstants.wdirid |
|
2373 or node in self.nodeconstants.wdirfilenodeids |
|
2374 ): |
2304 raise error.RevlogError( |
2375 raise error.RevlogError( |
2305 _(b"%s: attempt to add wdir revision") % self.indexfile |
2376 _(b"%s: attempt to add wdir revision") % self.display_id |
2306 ) |
2377 ) |
|
2378 if self._writinghandles is None: |
|
2379 msg = b'adding revision outside `revlog._writing` context' |
|
2380 raise error.ProgrammingError(msg) |
2307 |
2381 |
2308 if self._inline: |
2382 if self._inline: |
2309 fh = ifh |
2383 fh = self._writinghandles[0] |
2310 else: |
2384 else: |
2311 fh = dfh |
2385 fh = self._writinghandles[1] |
2312 |
2386 |
2313 btext = [rawtext] |
2387 btext = [rawtext] |
2314 |
2388 |
2315 curr = len(self) |
2389 curr = len(self) |
2316 prev = curr - 1 |
2390 prev = curr - 1 |
2317 |
2391 |
2318 offset = self._get_data_offset(prev) |
2392 offset = self._get_data_offset(prev) |
2319 |
2393 |
2320 if self._concurrencychecker: |
2394 if self._concurrencychecker: |
|
2395 ifh, dfh, sdfh = self._writinghandles |
|
2396 # XXX no checking for the sidedata file |
2321 if self._inline: |
2397 if self._inline: |
2322 # offset is "as if" it were in the .d file, so we need to add on |
2398 # offset is "as if" it were in the .d file, so we need to add on |
2323 # the size of the entry metadata. |
2399 # the size of the entry metadata. |
2324 self._concurrencychecker( |
2400 self._concurrencychecker( |
2325 ifh, self.indexfile, offset + curr * self.index.entry_size |
2401 ifh, self._indexfile, offset + curr * self.index.entry_size |
2326 ) |
2402 ) |
2327 else: |
2403 else: |
2328 # Entries in the .i are a consistent size. |
2404 # Entries in the .i are a consistent size. |
2329 self._concurrencychecker( |
2405 self._concurrencychecker( |
2330 ifh, self.indexfile, curr * self.index.entry_size |
2406 ifh, self._indexfile, curr * self.index.entry_size |
2331 ) |
2407 ) |
2332 self._concurrencychecker(dfh, self.datafile, offset) |
2408 self._concurrencychecker(dfh, self._datafile, offset) |
2333 |
2409 |
2334 p1r, p2r = self.rev(p1), self.rev(p2) |
2410 p1r, p2r = self.rev(p1), self.rev(p2) |
2335 |
2411 |
2336 # full versions are inserted when the needed deltas |
2412 # full versions are inserted when the needed deltas |
2337 # become comparable to the uncompressed text |
2413 # become comparable to the uncompressed text |
2346 textlen = len(rawtext) |
2422 textlen = len(rawtext) |
2347 |
2423 |
2348 if deltacomputer is None: |
2424 if deltacomputer is None: |
2349 deltacomputer = deltautil.deltacomputer(self) |
2425 deltacomputer = deltautil.deltacomputer(self) |
2350 |
2426 |
2351 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags) |
2427 revinfo = revlogutils.revisioninfo( |
|
2428 node, |
|
2429 p1, |
|
2430 p2, |
|
2431 btext, |
|
2432 textlen, |
|
2433 cachedelta, |
|
2434 flags, |
|
2435 ) |
2352 |
2436 |
2353 deltainfo = deltacomputer.finddeltainfo(revinfo, fh) |
2437 deltainfo = deltacomputer.finddeltainfo(revinfo, fh) |
2354 |
2438 |
2355 if sidedata: |
2439 compression_mode = COMP_MODE_INLINE |
|
2440 if self._docket is not None: |
|
2441 default_comp = self._docket.default_compression_header |
|
2442 r = deltautil.delta_compression(default_comp, deltainfo) |
|
2443 compression_mode, deltainfo = r |
|
2444 |
|
2445 sidedata_compression_mode = COMP_MODE_INLINE |
|
2446 if sidedata and self.hassidedata: |
|
2447 sidedata_compression_mode = COMP_MODE_PLAIN |
2356 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) |
2448 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) |
2357 sidedata_offset = offset + deltainfo.deltalen |
2449 sidedata_offset = self._docket.sidedata_end |
|
2450 h, comp_sidedata = self.compress(serialized_sidedata) |
|
2451 if ( |
|
2452 h != b'u' |
|
2453 and comp_sidedata[0:1] != b'\0' |
|
2454 and len(comp_sidedata) < len(serialized_sidedata) |
|
2455 ): |
|
2456 assert not h |
|
2457 if ( |
|
2458 comp_sidedata[0:1] |
|
2459 == self._docket.default_compression_header |
|
2460 ): |
|
2461 sidedata_compression_mode = COMP_MODE_DEFAULT |
|
2462 serialized_sidedata = comp_sidedata |
|
2463 else: |
|
2464 sidedata_compression_mode = COMP_MODE_INLINE |
|
2465 serialized_sidedata = comp_sidedata |
2358 else: |
2466 else: |
2359 serialized_sidedata = b"" |
2467 serialized_sidedata = b"" |
2360 # Don't store the offset if the sidedata is empty, that way |
2468 # Don't store the offset if the sidedata is empty, that way |
2361 # we can easily detect empty sidedata and they will be no different |
2469 # we can easily detect empty sidedata and they will be no different |
2362 # than ones we manually add. |
2470 # than ones we manually add. |
2363 sidedata_offset = 0 |
2471 sidedata_offset = 0 |
2364 |
2472 |
2365 e = ( |
2473 e = revlogutils.entry( |
2366 offset_type(offset, flags), |
2474 flags=flags, |
2367 deltainfo.deltalen, |
2475 data_offset=offset, |
2368 textlen, |
2476 data_compressed_length=deltainfo.deltalen, |
2369 deltainfo.base, |
2477 data_uncompressed_length=textlen, |
2370 link, |
2478 data_compression_mode=compression_mode, |
2371 p1r, |
2479 data_delta_base=deltainfo.base, |
2372 p2r, |
2480 link_rev=link, |
2373 node, |
2481 parent_rev_1=p1r, |
2374 sidedata_offset, |
2482 parent_rev_2=p2r, |
2375 len(serialized_sidedata), |
2483 node_id=node, |
|
2484 sidedata_offset=sidedata_offset, |
|
2485 sidedata_compressed_length=len(serialized_sidedata), |
|
2486 sidedata_compression_mode=sidedata_compression_mode, |
2376 ) |
2487 ) |
2377 |
2488 |
2378 if self.version & 0xFFFF != REVLOGV2: |
|
2379 e = e[:8] |
|
2380 |
|
2381 self.index.append(e) |
2489 self.index.append(e) |
2382 entry = self._io.packentry(e, self.node, self.version, curr) |
2490 entry = self.index.entry_binary(curr) |
|
2491 if curr == 0 and self._docket is None: |
|
2492 header = self._format_flags | self._format_version |
|
2493 header = self.index.pack_header(header) |
|
2494 entry = header + entry |
2383 self._writeentry( |
2495 self._writeentry( |
2384 transaction, |
2496 transaction, |
2385 ifh, |
|
2386 dfh, |
|
2387 entry, |
2497 entry, |
2388 deltainfo.data, |
2498 deltainfo.data, |
2389 link, |
2499 link, |
2390 offset, |
2500 offset, |
2391 serialized_sidedata, |
2501 serialized_sidedata, |
|
2502 sidedata_offset, |
2392 ) |
2503 ) |
2393 |
2504 |
2394 rawtext = btext[0] |
2505 rawtext = btext[0] |
2395 |
2506 |
2396 if alwayscache and rawtext is None: |
2507 if alwayscache and rawtext is None: |
2408 end of the data file within a transaction, you can have cases where, for |
2519 end of the data file within a transaction, you can have cases where, for |
2409 example, rev `n` does not have sidedata while rev `n - 1` does, leading |
2520 example, rev `n` does not have sidedata while rev `n - 1` does, leading |
2410 to `n - 1`'s sidedata being written after `n`'s data. |
2521 to `n - 1`'s sidedata being written after `n`'s data. |
2411 |
2522 |
2412 TODO cache this in a docket file before getting out of experimental.""" |
2523 TODO cache this in a docket file before getting out of experimental.""" |
2413 if self.version & 0xFFFF != REVLOGV2: |
2524 if self._docket is None: |
2414 return self.end(prev) |
2525 return self.end(prev) |
2415 |
2526 else: |
2416 offset = 0 |
2527 return self._docket.data_end |
2417 for rev, entry in enumerate(self.index): |
|
2418 sidedata_end = entry[8] + entry[9] |
|
2419 # Sidedata for a previous rev has potentially been written after |
|
2420 # this rev's end, so take the max. |
|
2421 offset = max(self.end(rev), offset, sidedata_end) |
|
2422 return offset |
|
2423 |
2528 |
2424 def _writeentry( |
2529 def _writeentry( |
2425 self, transaction, ifh, dfh, entry, data, link, offset, sidedata |
2530 self, transaction, entry, data, link, offset, sidedata, sidedata_offset |
2426 ): |
2531 ): |
2427 # Files opened in a+ mode have inconsistent behavior on various |
2532 # Files opened in a+ mode have inconsistent behavior on various |
2428 # platforms. Windows requires that a file positioning call be made |
2533 # platforms. Windows requires that a file positioning call be made |
2429 # when the file handle transitions between reads and writes. See |
2534 # when the file handle transitions between reads and writes. See |
2430 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other |
2535 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other |
2434 # |
2539 # |
2435 # We work around this issue by inserting a seek() before writing. |
2540 # We work around this issue by inserting a seek() before writing. |
2436 # Note: This is likely not necessary on Python 3. However, because |
2541 # Note: This is likely not necessary on Python 3. However, because |
2437 # the file handle is reused for reads and may be seeked there, we need |
2542 # the file handle is reused for reads and may be seeked there, we need |
2438 # to be careful before changing this. |
2543 # to be careful before changing this. |
2439 ifh.seek(0, os.SEEK_END) |
2544 if self._writinghandles is None: |
|
2545 msg = b'adding revision outside `revlog._writing` context' |
|
2546 raise error.ProgrammingError(msg) |
|
2547 ifh, dfh, sdfh = self._writinghandles |
|
2548 if self._docket is None: |
|
2549 ifh.seek(0, os.SEEK_END) |
|
2550 else: |
|
2551 ifh.seek(self._docket.index_end, os.SEEK_SET) |
2440 if dfh: |
2552 if dfh: |
2441 dfh.seek(0, os.SEEK_END) |
2553 if self._docket is None: |
|
2554 dfh.seek(0, os.SEEK_END) |
|
2555 else: |
|
2556 dfh.seek(self._docket.data_end, os.SEEK_SET) |
|
2557 if sdfh: |
|
2558 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET) |
2442 |
2559 |
2443 curr = len(self) - 1 |
2560 curr = len(self) - 1 |
2444 if not self._inline: |
2561 if not self._inline: |
2445 transaction.add(self.datafile, offset) |
2562 transaction.add(self._datafile, offset) |
2446 transaction.add(self.indexfile, curr * len(entry)) |
2563 if self._sidedatafile: |
|
2564 transaction.add(self._sidedatafile, sidedata_offset) |
|
2565 transaction.add(self._indexfile, curr * len(entry)) |
2447 if data[0]: |
2566 if data[0]: |
2448 dfh.write(data[0]) |
2567 dfh.write(data[0]) |
2449 dfh.write(data[1]) |
2568 dfh.write(data[1]) |
2450 if sidedata: |
2569 if sidedata: |
2451 dfh.write(sidedata) |
2570 sdfh.write(sidedata) |
2452 ifh.write(entry) |
2571 ifh.write(entry) |
2453 else: |
2572 else: |
2454 offset += curr * self.index.entry_size |
2573 offset += curr * self.index.entry_size |
2455 transaction.add(self.indexfile, offset) |
2574 transaction.add(self._indexfile, offset) |
2456 ifh.write(entry) |
2575 ifh.write(entry) |
2457 ifh.write(data[0]) |
2576 ifh.write(data[0]) |
2458 ifh.write(data[1]) |
2577 ifh.write(data[1]) |
2459 if sidedata: |
2578 assert not sidedata |
2460 ifh.write(sidedata) |
2579 self._enforceinlinesize(transaction) |
2461 self._enforceinlinesize(transaction, ifh) |
2580 if self._docket is not None: |
|
2581 self._docket.index_end = self._writinghandles[0].tell() |
|
2582 self._docket.data_end = self._writinghandles[1].tell() |
|
2583 self._docket.sidedata_end = self._writinghandles[2].tell() |
|
2584 |
2462 nodemaputil.setup_persistent_nodemap(transaction, self) |
2585 nodemaputil.setup_persistent_nodemap(transaction, self) |
2463 |
2586 |
2464 def addgroup( |
2587 def addgroup( |
2465 self, |
2588 self, |
2466 deltas, |
2589 deltas, |
2479 |
2602 |
2480 If ``addrevisioncb`` is defined, it will be called with arguments of |
2603 If ``addrevisioncb`` is defined, it will be called with arguments of |
2481 this revlog and the node that was added. |
2604 this revlog and the node that was added. |
2482 """ |
2605 """ |
2483 |
2606 |
2484 if self._writinghandles: |
2607 if self._adding_group: |
2485 raise error.ProgrammingError(b'cannot nest addgroup() calls') |
2608 raise error.ProgrammingError(b'cannot nest addgroup() calls') |
2486 |
2609 |
2487 r = len(self) |
2610 self._adding_group = True |
2488 end = 0 |
|
2489 if r: |
|
2490 end = self.end(r - 1) |
|
2491 ifh = self._indexfp(b"a+") |
|
2492 isize = r * self.index.entry_size |
|
2493 if self._inline: |
|
2494 transaction.add(self.indexfile, end + isize) |
|
2495 dfh = None |
|
2496 else: |
|
2497 transaction.add(self.indexfile, isize) |
|
2498 transaction.add(self.datafile, end) |
|
2499 dfh = self._datafp(b"a+") |
|
2500 |
|
2501 def flush(): |
|
2502 if dfh: |
|
2503 dfh.flush() |
|
2504 ifh.flush() |
|
2505 |
|
2506 self._writinghandles = (ifh, dfh) |
|
2507 empty = True |
2611 empty = True |
2508 |
|
2509 try: |
2612 try: |
2510 deltacomputer = deltautil.deltacomputer(self) |
2613 with self._writing(transaction): |
2511 # loop through our set of deltas |
2614 deltacomputer = deltautil.deltacomputer(self) |
2512 for data in deltas: |
2615 # loop through our set of deltas |
2513 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data |
2616 for data in deltas: |
2514 link = linkmapper(linknode) |
2617 ( |
2515 flags = flags or REVIDX_DEFAULT_FLAGS |
2618 node, |
2516 |
2619 p1, |
2517 rev = self.index.get_rev(node) |
2620 p2, |
2518 if rev is not None: |
2621 linknode, |
2519 # this can happen if two branches make the same change |
2622 deltabase, |
2520 self._nodeduplicatecallback(transaction, rev) |
2623 delta, |
2521 if duplicaterevisioncb: |
2624 flags, |
2522 duplicaterevisioncb(self, rev) |
2625 sidedata, |
|
2626 ) = data |
|
2627 link = linkmapper(linknode) |
|
2628 flags = flags or REVIDX_DEFAULT_FLAGS |
|
2629 |
|
2630 rev = self.index.get_rev(node) |
|
2631 if rev is not None: |
|
2632 # this can happen if two branches make the same change |
|
2633 self._nodeduplicatecallback(transaction, rev) |
|
2634 if duplicaterevisioncb: |
|
2635 duplicaterevisioncb(self, rev) |
|
2636 empty = False |
|
2637 continue |
|
2638 |
|
2639 for p in (p1, p2): |
|
2640 if not self.index.has_node(p): |
|
2641 raise error.LookupError( |
|
2642 p, self.radix, _(b'unknown parent') |
|
2643 ) |
|
2644 |
|
2645 if not self.index.has_node(deltabase): |
|
2646 raise error.LookupError( |
|
2647 deltabase, self.display_id, _(b'unknown delta base') |
|
2648 ) |
|
2649 |
|
2650 baserev = self.rev(deltabase) |
|
2651 |
|
2652 if baserev != nullrev and self.iscensored(baserev): |
|
2653 # if base is censored, delta must be full replacement in a |
|
2654 # single patch operation |
|
2655 hlen = struct.calcsize(b">lll") |
|
2656 oldlen = self.rawsize(baserev) |
|
2657 newlen = len(delta) - hlen |
|
2658 if delta[:hlen] != mdiff.replacediffheader( |
|
2659 oldlen, newlen |
|
2660 ): |
|
2661 raise error.CensoredBaseError( |
|
2662 self.display_id, self.node(baserev) |
|
2663 ) |
|
2664 |
|
2665 if not flags and self._peek_iscensored(baserev, delta): |
|
2666 flags |= REVIDX_ISCENSORED |
|
2667 |
|
2668 # We assume consumers of addrevisioncb will want to retrieve |
|
2669 # the added revision, which will require a call to |
|
2670 # revision(). revision() will fast path if there is a cache |
|
2671 # hit. So, we tell _addrevision() to always cache in this case. |
|
2672 # We're only using addgroup() in the context of changegroup |
|
2673 # generation so the revision data can always be handled as raw |
|
2674 # by the flagprocessor. |
|
2675 rev = self._addrevision( |
|
2676 node, |
|
2677 None, |
|
2678 transaction, |
|
2679 link, |
|
2680 p1, |
|
2681 p2, |
|
2682 flags, |
|
2683 (baserev, delta), |
|
2684 alwayscache=alwayscache, |
|
2685 deltacomputer=deltacomputer, |
|
2686 sidedata=sidedata, |
|
2687 ) |
|
2688 |
|
2689 if addrevisioncb: |
|
2690 addrevisioncb(self, rev) |
2523 empty = False |
2691 empty = False |
2524 continue |
|
2525 |
|
2526 for p in (p1, p2): |
|
2527 if not self.index.has_node(p): |
|
2528 raise error.LookupError( |
|
2529 p, self.indexfile, _(b'unknown parent') |
|
2530 ) |
|
2531 |
|
2532 if not self.index.has_node(deltabase): |
|
2533 raise error.LookupError( |
|
2534 deltabase, self.indexfile, _(b'unknown delta base') |
|
2535 ) |
|
2536 |
|
2537 baserev = self.rev(deltabase) |
|
2538 |
|
2539 if baserev != nullrev and self.iscensored(baserev): |
|
2540 # if base is censored, delta must be full replacement in a |
|
2541 # single patch operation |
|
2542 hlen = struct.calcsize(b">lll") |
|
2543 oldlen = self.rawsize(baserev) |
|
2544 newlen = len(delta) - hlen |
|
2545 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen): |
|
2546 raise error.CensoredBaseError( |
|
2547 self.indexfile, self.node(baserev) |
|
2548 ) |
|
2549 |
|
2550 if not flags and self._peek_iscensored(baserev, delta, flush): |
|
2551 flags |= REVIDX_ISCENSORED |
|
2552 |
|
2553 # We assume consumers of addrevisioncb will want to retrieve |
|
2554 # the added revision, which will require a call to |
|
2555 # revision(). revision() will fast path if there is a cache |
|
2556 # hit. So, we tell _addrevision() to always cache in this case. |
|
2557 # We're only using addgroup() in the context of changegroup |
|
2558 # generation so the revision data can always be handled as raw |
|
2559 # by the flagprocessor. |
|
2560 rev = self._addrevision( |
|
2561 node, |
|
2562 None, |
|
2563 transaction, |
|
2564 link, |
|
2565 p1, |
|
2566 p2, |
|
2567 flags, |
|
2568 (baserev, delta), |
|
2569 ifh, |
|
2570 dfh, |
|
2571 alwayscache=alwayscache, |
|
2572 deltacomputer=deltacomputer, |
|
2573 sidedata=sidedata, |
|
2574 ) |
|
2575 |
|
2576 if addrevisioncb: |
|
2577 addrevisioncb(self, rev) |
|
2578 empty = False |
|
2579 |
|
2580 if not dfh and not self._inline: |
|
2581 # addrevision switched from inline to conventional |
|
2582 # reopen the index |
|
2583 ifh.close() |
|
2584 dfh = self._datafp(b"a+") |
|
2585 ifh = self._indexfp(b"a+") |
|
2586 self._writinghandles = (ifh, dfh) |
|
2587 finally: |
2692 finally: |
2588 self._writinghandles = None |
2693 self._adding_group = False |
2589 |
|
2590 if dfh: |
|
2591 dfh.close() |
|
2592 ifh.close() |
|
2593 return not empty |
2694 return not empty |
2594 |
2695 |
2595 def iscensored(self, rev): |
2696 def iscensored(self, rev): |
2596 """Check if a file revision is censored.""" |
2697 """Check if a file revision is censored.""" |
2597 if not self._censorable: |
2698 if not self._censorable: |
2598 return False |
2699 return False |
2599 |
2700 |
2600 return self.flags(rev) & REVIDX_ISCENSORED |
2701 return self.flags(rev) & REVIDX_ISCENSORED |
2601 |
2702 |
2602 def _peek_iscensored(self, baserev, delta, flush): |
2703 def _peek_iscensored(self, baserev, delta): |
2603 """Quickly check if a delta produces a censored revision.""" |
2704 """Quickly check if a delta produces a censored revision.""" |
2604 if not self._censorable: |
2705 if not self._censorable: |
2605 return False |
2706 return False |
2606 |
2707 |
2607 return storageutil.deltaiscensored(delta, baserev, self.rawsize) |
2708 return storageutil.deltaiscensored(delta, baserev, self.rawsize) |
2640 rev, _ = self.getstrippoint(minlink) |
2741 rev, _ = self.getstrippoint(minlink) |
2641 if rev == len(self): |
2742 if rev == len(self): |
2642 return |
2743 return |
2643 |
2744 |
2644 # first truncate the files on disk |
2745 # first truncate the files on disk |
2645 end = self.start(rev) |
2746 data_end = self.start(rev) |
2646 if not self._inline: |
2747 if not self._inline: |
2647 transaction.add(self.datafile, end) |
2748 transaction.add(self._datafile, data_end) |
2648 end = rev * self.index.entry_size |
2749 end = rev * self.index.entry_size |
2649 else: |
2750 else: |
2650 end += rev * self.index.entry_size |
2751 end = data_end + (rev * self.index.entry_size) |
2651 |
2752 |
2652 transaction.add(self.indexfile, end) |
2753 if self._sidedatafile: |
|
2754 sidedata_end = self.sidedata_cut_off(rev) |
|
2755 transaction.add(self._sidedatafile, sidedata_end) |
|
2756 |
|
2757 transaction.add(self._indexfile, end) |
|
2758 if self._docket is not None: |
|
2759 # XXX we could, leverage the docket while stripping. However it is |
|
2760 # not powerfull enough at the time of this comment |
|
2761 self._docket.index_end = end |
|
2762 self._docket.data_end = data_end |
|
2763 self._docket.sidedata_end = sidedata_end |
|
2764 self._docket.write(transaction, stripping=True) |
2653 |
2765 |
2654 # then reset internal state in memory to forget those revisions |
2766 # then reset internal state in memory to forget those revisions |
2655 self._revisioncache = None |
2767 self._revisioncache = None |
2656 self._chaininfocache = util.lrucachedict(500) |
2768 self._chaininfocache = util.lrucachedict(500) |
2657 self._chunkclear() |
2769 self._segmentfile.clear_cache() |
|
2770 self._segmentfile_sidedata.clear_cache() |
2658 |
2771 |
2659 del self.index[rev:-1] |
2772 del self.index[rev:-1] |
2660 |
2773 |
2661 def checksize(self): |
2774 def checksize(self): |
2662 """Check size of index and data files |
2775 """Check size of index and data files |
2932 if destrevlog._lazydelta: |
3030 if destrevlog._lazydelta: |
2933 dp = self.deltaparent(rev) |
3031 dp = self.deltaparent(rev) |
2934 if dp != nullrev: |
3032 if dp != nullrev: |
2935 cachedelta = (dp, bytes(self._chunk(rev))) |
3033 cachedelta = (dp, bytes(self._chunk(rev))) |
2936 |
3034 |
|
3035 sidedata = None |
2937 if not cachedelta: |
3036 if not cachedelta: |
2938 rawtext = self.rawdata(rev) |
3037 rawtext = self._revisiondata(rev) |
2939 |
3038 sidedata = self.sidedata(rev) |
2940 ifh = destrevlog.opener( |
3039 if sidedata is None: |
2941 destrevlog.indexfile, b'a+', checkambig=False |
3040 sidedata = self.sidedata(rev) |
2942 ) |
3041 |
2943 dfh = None |
3042 if sidedata_helpers is not None: |
2944 if not destrevlog._inline: |
3043 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers( |
2945 dfh = destrevlog.opener(destrevlog.datafile, b'a+') |
3044 self, sidedata_helpers, sidedata, rev |
2946 try: |
3045 ) |
|
3046 flags = flags | new_flags[0] & ~new_flags[1] |
|
3047 |
|
3048 with destrevlog._writing(tr): |
2947 destrevlog._addrevision( |
3049 destrevlog._addrevision( |
2948 node, |
3050 node, |
2949 rawtext, |
3051 rawtext, |
2950 tr, |
3052 tr, |
2951 linkrev, |
3053 linkrev, |
2952 p1, |
3054 p1, |
2953 p2, |
3055 p2, |
2954 flags, |
3056 flags, |
2955 cachedelta, |
3057 cachedelta, |
2956 ifh, |
|
2957 dfh, |
|
2958 deltacomputer=deltacomputer, |
3058 deltacomputer=deltacomputer, |
|
3059 sidedata=sidedata, |
2959 ) |
3060 ) |
2960 finally: |
|
2961 if dfh: |
|
2962 dfh.close() |
|
2963 ifh.close() |
|
2964 |
3061 |
2965 if addrevisioncb: |
3062 if addrevisioncb: |
2966 addrevisioncb(self, rev, node) |
3063 addrevisioncb(self, rev, node) |
2967 |
3064 |
2968 def censorrevision(self, tr, censornode, tombstone=b''): |
3065 def censorrevision(self, tr, censornode, tombstone=b''): |
2969 if (self.version & 0xFFFF) == REVLOGV0: |
3066 if self._format_version == REVLOGV0: |
2970 raise error.RevlogError( |
3067 raise error.RevlogError( |
2971 _(b'cannot censor with version %d revlogs') % self.version |
3068 _(b'cannot censor with version %d revlogs') |
|
3069 % self._format_version |
2972 ) |
3070 ) |
2973 |
3071 elif self._format_version == REVLOGV1: |
2974 censorrev = self.rev(censornode) |
3072 rewrite.v1_censor(self, tr, censornode, tombstone) |
2975 tombstone = storageutil.packmeta({b'censored': tombstone}, b'') |
3073 else: |
2976 |
3074 rewrite.v2_censor(self, tr, censornode, tombstone) |
2977 if len(tombstone) > self.rawsize(censorrev): |
|
2978 raise error.Abort( |
|
2979 _(b'censor tombstone must be no longer than censored data') |
|
2980 ) |
|
2981 |
|
2982 # Rewriting the revlog in place is hard. Our strategy for censoring is |
|
2983 # to create a new revlog, copy all revisions to it, then replace the |
|
2984 # revlogs on transaction close. |
|
2985 |
|
2986 newindexfile = self.indexfile + b'.tmpcensored' |
|
2987 newdatafile = self.datafile + b'.tmpcensored' |
|
2988 |
|
2989 # This is a bit dangerous. We could easily have a mismatch of state. |
|
2990 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True) |
|
2991 newrl.version = self.version |
|
2992 newrl._generaldelta = self._generaldelta |
|
2993 newrl._io = self._io |
|
2994 |
|
2995 for rev in self.revs(): |
|
2996 node = self.node(rev) |
|
2997 p1, p2 = self.parents(node) |
|
2998 |
|
2999 if rev == censorrev: |
|
3000 newrl.addrawrevision( |
|
3001 tombstone, |
|
3002 tr, |
|
3003 self.linkrev(censorrev), |
|
3004 p1, |
|
3005 p2, |
|
3006 censornode, |
|
3007 REVIDX_ISCENSORED, |
|
3008 ) |
|
3009 |
|
3010 if newrl.deltaparent(rev) != nullrev: |
|
3011 raise error.Abort( |
|
3012 _( |
|
3013 b'censored revision stored as delta; ' |
|
3014 b'cannot censor' |
|
3015 ), |
|
3016 hint=_( |
|
3017 b'censoring of revlogs is not ' |
|
3018 b'fully implemented; please report ' |
|
3019 b'this bug' |
|
3020 ), |
|
3021 ) |
|
3022 continue |
|
3023 |
|
3024 if self.iscensored(rev): |
|
3025 if self.deltaparent(rev) != nullrev: |
|
3026 raise error.Abort( |
|
3027 _( |
|
3028 b'cannot censor due to censored ' |
|
3029 b'revision having delta stored' |
|
3030 ) |
|
3031 ) |
|
3032 rawtext = self._chunk(rev) |
|
3033 else: |
|
3034 rawtext = self.rawdata(rev) |
|
3035 |
|
3036 newrl.addrawrevision( |
|
3037 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev) |
|
3038 ) |
|
3039 |
|
3040 tr.addbackup(self.indexfile, location=b'store') |
|
3041 if not self._inline: |
|
3042 tr.addbackup(self.datafile, location=b'store') |
|
3043 |
|
3044 self.opener.rename(newrl.indexfile, self.indexfile) |
|
3045 if not self._inline: |
|
3046 self.opener.rename(newrl.datafile, self.datafile) |
|
3047 |
|
3048 self.clearcaches() |
|
3049 self._loadindex() |
|
3050 |
3075 |
3051 def verifyintegrity(self, state): |
3076 def verifyintegrity(self, state): |
3052 """Verifies the integrity of the revlog. |
3077 """Verifies the integrity of the revlog. |
3053 |
3078 |
3054 Yields ``revlogproblem`` instances describing problems that are |
3079 Yields ``revlogproblem`` instances describing problems that are |
3182 self.opener.stat(path).st_size for path in self.files() |
3207 self.opener.stat(path).st_size for path in self.files() |
3183 ) |
3208 ) |
3184 |
3209 |
3185 return d |
3210 return d |
3186 |
3211 |
3187 def rewrite_sidedata(self, helpers, startrev, endrev): |
3212 def rewrite_sidedata(self, transaction, helpers, startrev, endrev): |
3188 if self.version & 0xFFFF != REVLOGV2: |
3213 if not self.hassidedata: |
3189 return |
3214 return |
3190 # inline are not yet supported because they suffer from an issue when |
3215 # revlog formats with sidedata support does not support inline |
3191 # rewriting them (since it's not an append-only operation). |
|
3192 # See issue6485. |
|
3193 assert not self._inline |
3216 assert not self._inline |
3194 if not helpers[1] and not helpers[2]: |
3217 if not helpers[1] and not helpers[2]: |
3195 # Nothing to generate or remove |
3218 # Nothing to generate or remove |
3196 return |
3219 return |
3197 |
3220 |
3198 new_entries = [] |
3221 new_entries = [] |
3199 # append the new sidedata |
3222 # append the new sidedata |
3200 with self._datafp(b'a+') as fp: |
3223 with self._writing(transaction): |
3201 # Maybe this bug still exists, see revlog._writeentry |
3224 ifh, dfh, sdfh = self._writinghandles |
3202 fp.seek(0, os.SEEK_END) |
3225 dfh.seek(self._docket.sidedata_end, os.SEEK_SET) |
3203 current_offset = fp.tell() |
3226 |
|
3227 current_offset = sdfh.tell() |
3204 for rev in range(startrev, endrev + 1): |
3228 for rev in range(startrev, endrev + 1): |
3205 entry = self.index[rev] |
3229 entry = self.index[rev] |
3206 new_sidedata = storageutil.run_sidedata_helpers( |
3230 new_sidedata, flags = sidedatautil.run_sidedata_helpers( |
3207 store=self, |
3231 store=self, |
3208 sidedata_helpers=helpers, |
3232 sidedata_helpers=helpers, |
3209 sidedata={}, |
3233 sidedata={}, |
3210 rev=rev, |
3234 rev=rev, |
3211 ) |
3235 ) |
3212 |
3236 |
3213 serialized_sidedata = sidedatautil.serialize_sidedata( |
3237 serialized_sidedata = sidedatautil.serialize_sidedata( |
3214 new_sidedata |
3238 new_sidedata |
3215 ) |
3239 ) |
|
3240 |
|
3241 sidedata_compression_mode = COMP_MODE_INLINE |
|
3242 if serialized_sidedata and self.hassidedata: |
|
3243 sidedata_compression_mode = COMP_MODE_PLAIN |
|
3244 h, comp_sidedata = self.compress(serialized_sidedata) |
|
3245 if ( |
|
3246 h != b'u' |
|
3247 and comp_sidedata[0] != b'\0' |
|
3248 and len(comp_sidedata) < len(serialized_sidedata) |
|
3249 ): |
|
3250 assert not h |
|
3251 if ( |
|
3252 comp_sidedata[0] |
|
3253 == self._docket.default_compression_header |
|
3254 ): |
|
3255 sidedata_compression_mode = COMP_MODE_DEFAULT |
|
3256 serialized_sidedata = comp_sidedata |
|
3257 else: |
|
3258 sidedata_compression_mode = COMP_MODE_INLINE |
|
3259 serialized_sidedata = comp_sidedata |
3216 if entry[8] != 0 or entry[9] != 0: |
3260 if entry[8] != 0 or entry[9] != 0: |
3217 # rewriting entries that already have sidedata is not |
3261 # rewriting entries that already have sidedata is not |
3218 # supported yet, because it introduces garbage data in the |
3262 # supported yet, because it introduces garbage data in the |
3219 # revlog. |
3263 # revlog. |
3220 msg = b"Rewriting existing sidedata is not supported yet" |
3264 msg = b"rewriting existing sidedata is not supported yet" |
3221 raise error.Abort(msg) |
3265 raise error.Abort(msg) |
3222 entry = entry[:8] |
3266 |
3223 entry += (current_offset, len(serialized_sidedata)) |
3267 # Apply (potential) flags to add and to remove after running |
3224 |
3268 # the sidedata helpers |
3225 fp.write(serialized_sidedata) |
3269 new_offset_flags = entry[0] | flags[0] & ~flags[1] |
3226 new_entries.append(entry) |
3270 entry_update = ( |
|
3271 current_offset, |
|
3272 len(serialized_sidedata), |
|
3273 new_offset_flags, |
|
3274 sidedata_compression_mode, |
|
3275 ) |
|
3276 |
|
3277 # the sidedata computation might have move the file cursors around |
|
3278 sdfh.seek(current_offset, os.SEEK_SET) |
|
3279 sdfh.write(serialized_sidedata) |
|
3280 new_entries.append(entry_update) |
3227 current_offset += len(serialized_sidedata) |
3281 current_offset += len(serialized_sidedata) |
3228 |
3282 self._docket.sidedata_end = sdfh.tell() |
3229 # rewrite the new index entries |
3283 |
3230 with self._indexfp(b'w+') as fp: |
3284 # rewrite the new index entries |
3231 fp.seek(startrev * self.index.entry_size) |
3285 ifh.seek(startrev * self.index.entry_size) |
3232 for i, entry in enumerate(new_entries): |
3286 for i, e in enumerate(new_entries): |
3233 rev = startrev + i |
3287 rev = startrev + i |
3234 self.index.replace_sidedata_info(rev, entry[8], entry[9]) |
3288 self.index.replace_sidedata_info(rev, *e) |
3235 packed = self._io.packentry(entry, self.node, self.version, rev) |
3289 packed = self.index.entry_binary(rev) |
3236 fp.write(packed) |
3290 if rev == 0 and self._docket is None: |
|
3291 header = self._format_flags | self._format_version |
|
3292 header = self.index.pack_header(header) |
|
3293 packed = header + packed |
|
3294 ifh.write(packed) |