diff -r 29ea3b4c4f62 -r d7515d29761d mercurial/revlog.py --- a/mercurial/revlog.py Fri Jul 09 00:25:14 2021 +0530 +++ b/mercurial/revlog.py Wed Jul 21 22:52:09 2021 +0200 @@ -1,4 +1,5 @@ # revlog.py - storage back-end for mercurial +# coding: utf8 # # Copyright 2005-2007 Olivia Mackall # @@ -26,25 +27,24 @@ from .node import ( bin, hex, - nullhex, - nullid, nullrev, sha1nodeconstants, short, - wdirfilenodeids, - wdirhex, - wdirid, wdirrev, ) from .i18n import _ from .pycompat import getattr from .revlogutils.constants import ( + ALL_KINDS, + CHANGELOGV2, + COMP_MODE_DEFAULT, + COMP_MODE_INLINE, + COMP_MODE_PLAIN, + FEATURES_BY_VERSION, FLAG_GENERALDELTA, FLAG_INLINE_DATA, - INDEX_ENTRY_V0, - INDEX_ENTRY_V1, - INDEX_ENTRY_V2, INDEX_HEADER, + KIND_CHANGELOG, REVLOGV0, REVLOGV1, REVLOGV1_FLAGS, @@ -53,6 +53,7 @@ REVLOG_DEFAULT_FLAGS, REVLOG_DEFAULT_FORMAT, REVLOG_DEFAULT_VERSION, + SUPPORTED_FLAGS, ) from .revlogutils.flagutil import ( REVIDX_DEFAULT_FLAGS, @@ -62,7 +63,6 @@ REVIDX_HASCOPIESINFO, REVIDX_ISCENSORED, REVIDX_RAWTEXT_CHANGING_FLAGS, - REVIDX_SIDEDATA, ) from .thirdparty import attr from . import ( @@ -72,6 +72,7 @@ mdiff, policy, pycompat, + revlogutils, templatefilters, util, ) @@ -81,8 +82,12 @@ ) from .revlogutils import ( deltas as deltautil, + docket as docketutil, flagutil, nodemap as nodemaputil, + randomaccessfile, + revlogv0, + rewrite, sidedata as sidedatautil, ) from .utils import ( @@ -92,6 +97,7 @@ # blanked usage of all the name to prevent pyflakes constraints # We need these name available in the module for extensions. + REVLOGV0 REVLOGV1 REVLOGV2 @@ -104,7 +110,6 @@ REVLOGV2_FLAGS REVIDX_ISCENSORED REVIDX_ELLIPSIS -REVIDX_SIDEDATA REVIDX_HASCOPIESINFO REVIDX_EXTSTORED REVIDX_DEFAULT_FLAGS @@ -121,7 +126,6 @@ # max size of revlog with inline data _maxinline = 131072 -_chunksize = 1048576 # Flag processors for REVIDX_ELLIPSIS. def ellipsisreadprocessor(rl, text): @@ -143,20 +147,6 @@ ) -def getoffset(q): - return int(q >> 16) - - -def gettype(q): - return int(q & 0xFFFF) - - -def offset_type(offset, type): - if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0: - raise ValueError(b'unknown revlog index flags') - return int(int(offset) << 16 | type) - - def _verify_revision(rl, skipflags, state, node): """Verify the integrity of the given revlog ``node`` while providing a hook point for extensions to influence the operation.""" @@ -177,27 +167,6 @@ ) -@attr.s(slots=True, frozen=True) -class _revisioninfo(object): - """Information about a revision that allows building its fulltext - node: expected hash of the revision - p1, p2: parent revs of the revision - btext: built text cache consisting of a one-element list - cachedelta: (baserev, uncompressed_delta) or None - flags: flags associated to the revision storage - - One of btext[0] or cachedelta must be set. - """ - - node = attr.ib() - p1 = attr.ib() - p2 = attr.ib() - btext = attr.ib() - textlen = attr.ib() - cachedelta = attr.ib() - flags = attr.ib() - - @interfaceutil.implementer(repository.irevisiondelta) @attr.s(slots=True) class revlogrevisiondelta(object): @@ -210,6 +179,7 @@ revision = attr.ib() delta = attr.ib() sidedata = attr.ib() + protocol_flags = attr.ib() linknode = attr.ib(default=None) @@ -221,161 +191,51 @@ node = attr.ib(default=None) -class revlogoldindex(list): - entry_size = INDEX_ENTRY_V0.size - - @property - def nodemap(self): - msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]" - util.nouideprecwarn(msg, b'5.3', stacklevel=2) - return self._nodemap - - @util.propertycache - def _nodemap(self): - nodemap = nodemaputil.NodeMap({nullid: nullrev}) - for r in range(0, len(self)): - n = self[r][7] - nodemap[n] = r - return nodemap - - def has_node(self, node): - """return True if the node exist in the index""" - return node in self._nodemap - - def rev(self, node): - """return a revision for a node - - If the node is unknown, raise a RevlogError""" - return self._nodemap[node] - - def get_rev(self, node): - """return a revision for a node - - If the node is unknown, return None""" - return self._nodemap.get(node) - - def append(self, tup): - self._nodemap[tup[7]] = len(self) - super(revlogoldindex, self).append(tup) - - def __delitem__(self, i): - if not isinstance(i, slice) or not i.stop == -1 or i.step is not None: - raise ValueError(b"deleting slices only supports a:-1 with step 1") - for r in pycompat.xrange(i.start, len(self)): - del self._nodemap[self[r][7]] - super(revlogoldindex, self).__delitem__(i) - - def clearcaches(self): - self.__dict__.pop('_nodemap', None) - - def __getitem__(self, i): - if i == -1: - return (0, 0, 0, -1, -1, -1, -1, nullid) - return list.__getitem__(self, i) - - -class revlogoldio(object): - def parseindex(self, data, inline): - s = INDEX_ENTRY_V0.size - index = [] - nodemap = nodemaputil.NodeMap({nullid: nullrev}) - n = off = 0 - l = len(data) - while off + s <= l: - cur = data[off : off + s] - off += s - e = INDEX_ENTRY_V0.unpack(cur) - # transform to revlogv1 format - e2 = ( - offset_type(e[0], 0), - e[1], - -1, - e[2], - e[3], - nodemap.get(e[4], nullrev), - nodemap.get(e[5], nullrev), - e[6], - ) - index.append(e2) - nodemap[e[6]] = n - n += 1 - - index = revlogoldindex(index) - return index, None - - def packentry(self, entry, node, version, rev): - """return the binary representation of an entry - - entry: a tuple containing all the values (see index.__getitem__) - node: a callback to convert a revision to nodeid - version: the changelog version - rev: the revision number - """ - if gettype(entry[0]): - raise error.RevlogError( - _(b'index entry flags need revlog version 1') - ) - e2 = ( - getoffset(entry[0]), - entry[1], - entry[3], - entry[4], - node(entry[5]), - node(entry[6]), - entry[7], - ) - return INDEX_ENTRY_V0.pack(*e2) +def parse_index_v1(data, inline): + # call the C implementation to parse the index data + index, cache = parsers.parse_index2(data, inline) + return index, cache + + +def parse_index_v2(data, inline): + # call the C implementation to parse the index data + index, cache = parsers.parse_index2(data, inline, revlogv2=True) + return index, cache + + +def parse_index_cl_v2(data, inline): + # call the C implementation to parse the index data + assert not inline + from .pure.parsers import parse_index_cl_v2 + + index, cache = parse_index_cl_v2(data) + return index, cache + + +if util.safehasattr(parsers, 'parse_index_devel_nodemap'): + + def parse_index_v1_nodemap(data, inline): + index, cache = parsers.parse_index_devel_nodemap(data, inline) + return index, cache + + +else: + parse_index_v1_nodemap = None + + +def parse_index_v1_mixed(data, inline): + index, cache = parse_index_v1(data, inline) + return rustrevlog.MixedIndex(index), cache # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte # signed integer) _maxentrysize = 0x7FFFFFFF - -class revlogio(object): - def parseindex(self, data, inline): - # call the C implementation to parse the index data - index, cache = parsers.parse_index2(data, inline) - return index, cache - - def packentry(self, entry, node, version, rev): - p = INDEX_ENTRY_V1.pack(*entry) - if rev == 0: - p = INDEX_HEADER.pack(version) + p[4:] - return p - - -class revlogv2io(object): - def parseindex(self, data, inline): - index, cache = parsers.parse_index2(data, inline, revlogv2=True) - return index, cache - - def packentry(self, entry, node, version, rev): - p = INDEX_ENTRY_V2.pack(*entry) - if rev == 0: - p = INDEX_HEADER.pack(version) + p[4:] - return p - - -NodemapRevlogIO = None - -if util.safehasattr(parsers, 'parse_index_devel_nodemap'): - - class NodemapRevlogIO(revlogio): - """A debug oriented IO class that return a PersistentNodeMapIndexObject - - The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature. - """ - - def parseindex(self, data, inline): - index, cache = parsers.parse_index_devel_nodemap(data, inline) - return index, cache - - -class rustrevlogio(revlogio): - def parseindex(self, data, inline): - index, cache = super(rustrevlogio, self).parseindex(data, inline) - return rustrevlog.MixedIndex(index), cache +FILE_TOO_SHORT_MSG = _( + b'cannot read from revlog %s;' + b' expected %d bytes from offset %d, data size is %d' +) class revlog(object): @@ -419,6 +279,9 @@ file handle, a filename, and an expected position. It should check whether the current position in the file handle is valid, and log/warn/fail (by raising). + + See mercurial/revlogutils/contants.py for details about the content of an + index entry. """ _flagserrorclass = error.RevlogError @@ -426,14 +289,16 @@ def __init__( self, opener, - indexfile, - datafile=None, + target, + radix, + postfix=None, # only exist for `tmpcensored` now checkambig=False, mmaplargeindex=False, censorable=False, upperboundcomp=None, persistentnodemap=False, concurrencychecker=None, + trypending=False, ): """ create a revlog object @@ -441,17 +306,31 @@ opener is a function that abstracts the file opening operation and can be used to implement COW semantics or the like. + `target`: a (KIND, ID) tuple that identify the content stored in + this revlog. It help the rest of the code to understand what the revlog + is about without having to resort to heuristic and index filename + analysis. Note: that this must be reliably be set by normal code, but + that test, debug, or performance measurement code might not set this to + accurate value. """ self.upperboundcomp = upperboundcomp - self.indexfile = indexfile - self.datafile = datafile or (indexfile[:-2] + b".d") - self.nodemap_file = None + + self.radix = radix + + self._docket_file = None + self._indexfile = None + self._datafile = None + self._sidedatafile = None + self._nodemap_file = None + self.postfix = postfix + self._trypending = trypending + self.opener = opener if persistentnodemap: - self.nodemap_file = nodemaputil.get_nodemap_file( - opener, self.indexfile - ) - - self.opener = opener + self._nodemap_file = nodemaputil.get_nodemap_file(self) + + assert target[0] in ALL_KINDS + assert len(target) == 2 + self.target = target # When True, indexfile is opened with checkambig=True at writing, to # avoid file stat ambiguity. self._checkambig = checkambig @@ -468,6 +347,7 @@ self._maxchainlen = None self._deltabothparents = True self.index = None + self._docket = None self._nodemap_docket = None # Mapping of partial identifiers to full nodes. self._pcache = {} @@ -477,6 +357,7 @@ self._maxdeltachainspan = -1 self._withsparseread = False self._sparserevlog = False + self.hassidedata = False self._srdensitythreshold = 0.50 self._srmingapsize = 262144 @@ -484,27 +365,46 @@ # custom flags. self._flagprocessors = dict(flagutil.flagprocessors) - # 2-tuple of file handles being used for active writing. + # 3-tuple of file handles being used for active writing. self._writinghandles = None + # prevent nesting of addgroup + self._adding_group = None self._loadindex() self._concurrencychecker = concurrencychecker - def _loadindex(self): + def _init_opts(self): + """process options (from above/config) to setup associated default revlog mode + + These values might be affected when actually reading on disk information. + + The relevant values are returned for use in _loadindex(). + + * newversionflags: + version header to use if we need to create a new revlog + + * mmapindexthreshold: + minimal index size for start to use mmap + + * force_nodemap: + force the usage of a "development" version of the nodemap code + """ mmapindexthreshold = None opts = self.opener.options - if b'revlogv2' in opts: - newversionflags = REVLOGV2 | FLAG_INLINE_DATA + if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG: + new_header = CHANGELOGV2 + elif b'revlogv2' in opts: + new_header = REVLOGV2 elif b'revlogv1' in opts: - newversionflags = REVLOGV1 | FLAG_INLINE_DATA + new_header = REVLOGV1 | FLAG_INLINE_DATA if b'generaldelta' in opts: - newversionflags |= FLAG_GENERALDELTA + new_header |= FLAG_GENERALDELTA elif b'revlogv0' in self.opener.options: - newversionflags = REVLOGV0 + new_header = REVLOGV0 else: - newversionflags = REVLOG_DEFAULT_VERSION + new_header = REVLOG_DEFAULT_VERSION if b'chunkcachesize' in opts: self._chunkcachesize = opts[b'chunkcachesize'] @@ -526,7 +426,6 @@ self._maxdeltachainspan = opts[b'maxdeltachainspan'] if self._mmaplargeindex and b'mmapindexthreshold' in opts: mmapindexthreshold = opts[b'mmapindexthreshold'] - self.hassidedata = bool(opts.get(b'side-data', False)) self._sparserevlog = bool(opts.get(b'sparse-revlog', False)) withsparseread = bool(opts.get(b'with-sparse-read', False)) # sparse-revlog forces sparse-read @@ -554,75 +453,118 @@ _(b'revlog chunk cache size %r is not a power of 2') % self._chunkcachesize ) - - indexdata = b'' - self._initempty = True + force_nodemap = opts.get(b'devel-force-nodemap', False) + return new_header, mmapindexthreshold, force_nodemap + + def _get_data(self, filepath, mmap_threshold, size=None): + """return a file content with or without mmap + + If the file is missing return the empty string""" try: - with self._indexfp() as f: - if ( - mmapindexthreshold is not None - and self.opener.fstat(f).st_size >= mmapindexthreshold - ): - # TODO: should .close() to release resources without - # relying on Python GC - indexdata = util.buffer(util.mmapread(f)) + with self.opener(filepath) as fp: + if mmap_threshold is not None: + file_size = self.opener.fstat(fp).st_size + if file_size >= mmap_threshold: + if size is not None: + # avoid potentiel mmap crash + size = min(file_size, size) + # TODO: should .close() to release resources without + # relying on Python GC + if size is None: + return util.buffer(util.mmapread(fp)) + else: + return util.buffer(util.mmapread(fp, size)) + if size is None: + return fp.read() else: - indexdata = f.read() - if len(indexdata) > 0: - versionflags = INDEX_HEADER.unpack(indexdata[:4])[0] - self._initempty = False - else: - versionflags = newversionflags + return fp.read(size) except IOError as inst: if inst.errno != errno.ENOENT: raise - - versionflags = newversionflags - - self.version = versionflags - - flags = versionflags & ~0xFFFF - fmt = versionflags & 0xFFFF - - if fmt == REVLOGV0: - if flags: - raise error.RevlogError( - _(b'unknown flags (%#04x) in version %d revlog %s') - % (flags >> 16, fmt, self.indexfile) + return b'' + + def _loadindex(self, docket=None): + + new_header, mmapindexthreshold, force_nodemap = self._init_opts() + + if self.postfix is not None: + entry_point = b'%s.i.%s' % (self.radix, self.postfix) + elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix): + entry_point = b'%s.i.a' % self.radix + else: + entry_point = b'%s.i' % self.radix + + if docket is not None: + self._docket = docket + self._docket_file = entry_point + else: + entry_data = b'' + self._initempty = True + entry_data = self._get_data(entry_point, mmapindexthreshold) + if len(entry_data) > 0: + header = INDEX_HEADER.unpack(entry_data[:4])[0] + self._initempty = False + else: + header = new_header + + self._format_flags = header & ~0xFFFF + self._format_version = header & 0xFFFF + + supported_flags = SUPPORTED_FLAGS.get(self._format_version) + if supported_flags is None: + msg = _(b'unknown version (%d) in revlog %s') + msg %= (self._format_version, self.display_id) + raise error.RevlogError(msg) + elif self._format_flags & ~supported_flags: + msg = _(b'unknown flags (%#04x) in version %d revlog %s') + display_flag = self._format_flags >> 16 + msg %= (display_flag, self._format_version, self.display_id) + raise error.RevlogError(msg) + + features = FEATURES_BY_VERSION[self._format_version] + self._inline = features[b'inline'](self._format_flags) + self._generaldelta = features[b'generaldelta'](self._format_flags) + self.hassidedata = features[b'sidedata'] + + if not features[b'docket']: + self._indexfile = entry_point + index_data = entry_data + else: + self._docket_file = entry_point + if self._initempty: + self._docket = docketutil.default_docket(self, header) + else: + self._docket = docketutil.parse_docket( + self, entry_data, use_pending=self._trypending + ) + + if self._docket is not None: + self._indexfile = self._docket.index_filepath() + index_data = b'' + index_size = self._docket.index_end + if index_size > 0: + index_data = self._get_data( + self._indexfile, mmapindexthreshold, size=index_size ) - - self._inline = False - self._generaldelta = False - - elif fmt == REVLOGV1: - if flags & ~REVLOGV1_FLAGS: - raise error.RevlogError( - _(b'unknown flags (%#04x) in version %d revlog %s') - % (flags >> 16, fmt, self.indexfile) - ) - - self._inline = versionflags & FLAG_INLINE_DATA - self._generaldelta = versionflags & FLAG_GENERALDELTA - - elif fmt == REVLOGV2: - if flags & ~REVLOGV2_FLAGS: - raise error.RevlogError( - _(b'unknown flags (%#04x) in version %d revlog %s') - % (flags >> 16, fmt, self.indexfile) - ) - - # There is a bug in the transaction handling when going from an - # inline revlog to a separate index and data file. Turn it off until - # it's fixed, since v2 revlogs sometimes get rewritten on exchange. - # See issue6485 + if len(index_data) < index_size: + msg = _(b'too few index data for %s: got %d, expected %d') + msg %= (self.display_id, len(index_data), index_size) + raise error.RevlogError(msg) + self._inline = False # generaldelta implied by version 2 revlogs. self._generaldelta = True - + # the logic for persistent nodemap will be dealt with within the + # main docket, so disable it for now. + self._nodemap_file = None + + if self._docket is not None: + self._datafile = self._docket.data_filepath() + self._sidedatafile = self._docket.sidedata_filepath() + elif self.postfix is None: + self._datafile = b'%s.d' % self.radix else: - raise error.RevlogError( - _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile) - ) + self._datafile = b'%s.d.%s' % (self.radix, self.postfix) self.nodeconstants = sha1nodeconstants self.nullid = self.nodeconstants.nullid @@ -634,33 +576,35 @@ self._storedeltachains = True devel_nodemap = ( - self.nodemap_file - and opts.get(b'devel-force-nodemap', False) - and NodemapRevlogIO is not None + self._nodemap_file + and force_nodemap + and parse_index_v1_nodemap is not None ) use_rust_index = False if rustrevlog is not None: - if self.nodemap_file is not None: + if self._nodemap_file is not None: use_rust_index = True else: use_rust_index = self.opener.options.get(b'rust.index') - self._io = revlogio() - if self.version == REVLOGV0: - self._io = revlogoldio() - elif fmt == REVLOGV2: - self._io = revlogv2io() + self._parse_index = parse_index_v1 + if self._format_version == REVLOGV0: + self._parse_index = revlogv0.parse_index_v0 + elif self._format_version == REVLOGV2: + self._parse_index = parse_index_v2 + elif self._format_version == CHANGELOGV2: + self._parse_index = parse_index_cl_v2 elif devel_nodemap: - self._io = NodemapRevlogIO() + self._parse_index = parse_index_v1_nodemap elif use_rust_index: - self._io = rustrevlogio() + self._parse_index = parse_index_v1_mixed try: - d = self._io.parseindex(indexdata, self._inline) - index, _chunkcache = d + d = self._parse_index(index_data, self._inline) + index, chunkcache = d use_nodemap = ( not self._inline - and self.nodemap_file is not None + and self._nodemap_file is not None and util.safehasattr(index, 'update_nodemap_data') ) if use_nodemap: @@ -676,58 +620,106 @@ index.update_nodemap_data(*nodemap_data) except (ValueError, IndexError): raise error.RevlogError( - _(b"index %s is corrupted") % self.indexfile + _(b"index %s is corrupted") % self.display_id ) - self.index, self._chunkcache = d - if not self._chunkcache: - self._chunkclear() + self.index = index + self._segmentfile = randomaccessfile.randomaccessfile( + self.opener, + (self._indexfile if self._inline else self._datafile), + self._chunkcachesize, + chunkcache, + ) + self._segmentfile_sidedata = randomaccessfile.randomaccessfile( + self.opener, + self._sidedatafile, + self._chunkcachesize, + ) # revnum -> (chain-length, sum-delta-length) self._chaininfocache = util.lrucachedict(500) # revlog header -> revlog compressor self._decompressors = {} @util.propertycache + def revlog_kind(self): + return self.target[0] + + @util.propertycache + def display_id(self): + """The public facing "ID" of the revlog that we use in message""" + # Maybe we should build a user facing representation of + # revlog.target instead of using `self.radix` + return self.radix + + def _get_decompressor(self, t): + try: + compressor = self._decompressors[t] + except KeyError: + try: + engine = util.compengines.forrevlogheader(t) + compressor = engine.revlogcompressor(self._compengineopts) + self._decompressors[t] = compressor + except KeyError: + raise error.RevlogError( + _(b'unknown compression type %s') % binascii.hexlify(t) + ) + return compressor + + @util.propertycache def _compressor(self): engine = util.compengines[self._compengine] return engine.revlogcompressor(self._compengineopts) - def _indexfp(self, mode=b'r'): + @util.propertycache + def _decompressor(self): + """the default decompressor""" + if self._docket is None: + return None + t = self._docket.default_compression_header + c = self._get_decompressor(t) + return c.decompress + + def _indexfp(self): """file object for the revlog's index file""" - args = {'mode': mode} - if mode != b'r': - args['checkambig'] = self._checkambig - if mode == b'w': - args['atomictemp'] = True - return self.opener(self.indexfile, **args) + return self.opener(self._indexfile, mode=b"r") + + def __index_write_fp(self): + # You should not use this directly and use `_writing` instead + try: + f = self.opener( + self._indexfile, mode=b"r+", checkambig=self._checkambig + ) + if self._docket is None: + f.seek(0, os.SEEK_END) + else: + f.seek(self._docket.index_end, os.SEEK_SET) + return f + except IOError as inst: + if inst.errno != errno.ENOENT: + raise + return self.opener( + self._indexfile, mode=b"w+", checkambig=self._checkambig + ) + + def __index_new_fp(self): + # You should not use this unless you are upgrading from inline revlog + return self.opener( + self._indexfile, + mode=b"w", + checkambig=self._checkambig, + atomictemp=True, + ) def _datafp(self, mode=b'r'): """file object for the revlog's data file""" - return self.opener(self.datafile, mode=mode) + return self.opener(self._datafile, mode=mode) @contextlib.contextmanager - def _datareadfp(self, existingfp=None): - """file object suitable to read data""" - # Use explicit file handle, if given. - if existingfp is not None: - yield existingfp - - # Use a file handle being actively used for writes, if available. - # There is some danger to doing this because reads will seek the - # file. However, _writeentry() performs a SEEK_END before all writes, - # so we should be safe. - elif self._writinghandles: - if self._inline: - yield self._writinghandles[0] - else: - yield self._writinghandles[1] - - # Otherwise open a new file handle. + def _sidedatareadfp(self): + """file object suitable to read sidedata""" + if self._writinghandles: + yield self._writinghandles[2] else: - if self._inline: - func = self._indexfp - else: - func = self._datafp - with func() as fp: + with self.opener(self._sidedatafile) as fp: yield fp def tiprev(self): @@ -785,7 +777,7 @@ return True def update_caches(self, transaction): - if self.nodemap_file is not None: + if self._nodemap_file is not None: if transaction is None: nodemaputil.update_persistent_nodemap(self) else: @@ -794,7 +786,8 @@ def clearcaches(self): self._revisioncache = None self._chainbasecache.clear() - self._chunkcache = (0, b'') + self._segmentfile.clear_cache() + self._segmentfile_sidedata.clear_cache() self._pcache = {} self._nodemap_docket = None self.index.clearcaches() @@ -802,7 +795,7 @@ # end up having to refresh it here. use_nodemap = ( not self._inline - and self.nodemap_file is not None + and self._nodemap_file is not None and util.safehasattr(self.index, 'update_nodemap_data') ) if use_nodemap: @@ -818,9 +811,12 @@ raise except error.RevlogError: # parsers.c radix tree lookup failed - if node == wdirid or node in wdirfilenodeids: + if ( + node == self.nodeconstants.wdirid + or node in self.nodeconstants.wdirfilenodeids + ): raise error.WdirUnsupported - raise error.LookupError(node, self.indexfile, _(b'no node')) + raise error.LookupError(node, self.display_id, _(b'no node')) # Accessors for index entries. @@ -829,6 +825,23 @@ def start(self, rev): return int(self.index[rev][0] >> 16) + def sidedata_cut_off(self, rev): + sd_cut_off = self.index[rev][8] + if sd_cut_off != 0: + return sd_cut_off + # This is some annoying dance, because entries without sidedata + # currently use 0 as their ofsset. (instead of previous-offset + + # previous-size) + # + # We should reconsider this sidedata → 0 sidata_offset policy. + # In the meantime, we need this. + while 0 <= rev: + e = self.index[rev] + if e[9] != 0: + return e[8] + e[9] + rev -= 1 + return 0 + def flags(self, rev): return self.index[rev][0] & 0xFFFF @@ -836,7 +849,7 @@ return self.index[rev][1] def sidedata_length(self, rev): - if self.version & 0xFFFF != REVLOGV2: + if not self.hassidedata: return 0 return self.index[rev][9] @@ -996,7 +1009,7 @@ checkrev(r) # and we're sure ancestors aren't filtered as well - if rustancestor is not None: + if rustancestor is not None and self.index.rust_ext_compat: lazyancestors = rustancestor.LazyAncestors arg = self.index else: @@ -1021,7 +1034,7 @@ not supplied, uses all of the revlog's heads. If common is not supplied, uses nullid.""" if common is None: - common = [nullid] + common = [self.nullid] if heads is None: heads = self.heads() @@ -1083,7 +1096,7 @@ if common is None: common = [nullrev] - if rustancestor is not None: + if rustancestor is not None and self.index.rust_ext_compat: return rustancestor.MissingAncestors(self.index, common) return ancestor.incrementalmissingancestors(self.parentrevs, common) @@ -1127,7 +1140,7 @@ not supplied, uses all of the revlog's heads. If common is not supplied, uses nullid.""" if common is None: - common = [nullid] + common = [self.nullid] if heads is None: heads = self.heads() @@ -1165,11 +1178,15 @@ return nonodes lowestrev = min([self.rev(n) for n in roots]) else: - roots = [nullid] # Everybody's a descendant of nullid + roots = [self.nullid] # Everybody's a descendant of nullid lowestrev = nullrev if (lowestrev == nullrev) and (heads is None): # We want _all_ the nodes! - return ([self.node(r) for r in self], [nullid], list(self.heads())) + return ( + [self.node(r) for r in self], + [self.nullid], + list(self.heads()), + ) if heads is None: # All nodes are ancestors, so the latest ancestor is the last # node. @@ -1195,7 +1212,7 @@ # grab a node to tag n = nodestotag.pop() # Never tag nullid - if n == nullid: + if n == self.nullid: continue # A node's revision number represents its place in a # topologically sorted list of nodes. @@ -1207,7 +1224,7 @@ ancestors.add(n) # Mark as ancestor # Add non-nullid parents to list of nodes to tag. nodestotag.update( - [p for p in self.parents(n) if p != nullid] + [p for p in self.parents(n) if p != self.nullid] ) elif n in heads: # We've seen it before, is it a fake head? # So it is, real heads should not be the ancestors of @@ -1235,7 +1252,7 @@ # We are descending from nullid, and don't need to care about # any other roots. lowestrev = nullrev - roots = [nullid] + roots = [self.nullid] # Transform our roots list into a set. descendants = set(roots) # Also, keep the original roots so we can filter out roots that aren't @@ -1299,7 +1316,7 @@ return self.index.headrevs() except AttributeError: return self._headrevs() - if rustdagop is not None: + if rustdagop is not None and self.index.rust_ext_compat: return rustdagop.headrevs(self.index, revs) return dagop.headrevs(revs, self._uncheckedparentrevs) @@ -1329,7 +1346,7 @@ """ if start is None and stop is None: if not len(self): - return [nullid] + return [self.nullid] return [self.node(r) for r in self.headrevs()] if start is None: @@ -1419,13 +1436,13 @@ if ancs: # choose a consistent winner when there's a tie return min(map(self.node, ancs)) - return nullid + return self.nullid def _match(self, id): if isinstance(id, int): # rev return self.node(id) - if len(id) == 20: + if len(id) == self.nodeconstants.nodelen: # possibly a binary node # odds of a binary node being all hex in ASCII are 1 in 10**25 try: @@ -1446,7 +1463,7 @@ return self.node(rev) except (ValueError, OverflowError): pass - if len(id) == 40: + if len(id) == 2 * self.nodeconstants.nodelen: try: # a full hex nodeid? node = bin(id) @@ -1457,29 +1474,34 @@ def _partialmatch(self, id): # we don't care wdirfilenodeids as they should be always full hash - maybewdir = wdirhex.startswith(id) + maybewdir = self.nodeconstants.wdirhex.startswith(id) + ambiguous = False try: partial = self.index.partialmatch(id) if partial and self.hasnode(partial): if maybewdir: # single 'ff...' match in radix tree, ambiguous with wdir - raise error.RevlogError - return partial - if maybewdir: + ambiguous = True + else: + return partial + elif maybewdir: # no 'ff...' match in radix tree, wdir identified raise error.WdirUnsupported - return None + else: + return None except error.RevlogError: # parsers.c radix tree lookup gave multiple matches # fast path: for unfiltered changelog, radix tree is accurate if not getattr(self, 'filteredrevs', None): - raise error.AmbiguousPrefixLookupError( - id, self.indexfile, _(b'ambiguous identifier') - ) + ambiguous = True # fall through to slow path that filters hidden revisions except (AttributeError, ValueError): # we are pure python, or key was too short to search radix tree pass + if ambiguous: + raise error.AmbiguousPrefixLookupError( + id, self.display_id, _(b'ambiguous identifier') + ) if id in self._pcache: return self._pcache[id] @@ -1493,14 +1515,14 @@ nl = [ n for n in nl if hex(n).startswith(id) and self.hasnode(n) ] - if nullhex.startswith(id): - nl.append(nullid) + if self.nodeconstants.nullhex.startswith(id): + nl.append(self.nullid) if len(nl) > 0: if len(nl) == 1 and not maybewdir: self._pcache[id] = nl[0] return nl[0] raise error.AmbiguousPrefixLookupError( - id, self.indexfile, _(b'ambiguous identifier') + id, self.display_id, _(b'ambiguous identifier') ) if maybewdir: raise error.WdirUnsupported @@ -1520,7 +1542,7 @@ if n: return n - raise error.LookupError(id, self.indexfile, _(b'no match found')) + raise error.LookupError(id, self.display_id, _(b'no match found')) def shortest(self, node, minlength=1): """Find the shortest unambiguous prefix that matches node.""" @@ -1534,7 +1556,7 @@ # single 'ff...' match return True if matchednode is None: - raise error.LookupError(node, self.indexfile, _(b'no node')) + raise error.LookupError(node, self.display_id, _(b'no node')) return True def maybewdir(prefix): @@ -1554,13 +1576,15 @@ length = max(self.index.shortest(node), minlength) return disambiguate(hexnode, length) except error.RevlogError: - if node != wdirid: - raise error.LookupError(node, self.indexfile, _(b'no node')) + if node != self.nodeconstants.wdirid: + raise error.LookupError( + node, self.display_id, _(b'no node') + ) except AttributeError: # Fall through to pure code pass - if node == wdirid: + if node == self.nodeconstants.wdirid: for length in range(minlength, len(hexnode) + 1): prefix = hexnode[:length] if isvalid(prefix): @@ -1579,102 +1603,6 @@ p1, p2 = self.parents(node) return storageutil.hashrevisionsha1(text, p1, p2) != node - def _cachesegment(self, offset, data): - """Add a segment to the revlog cache. - - Accepts an absolute offset and the data that is at that location. - """ - o, d = self._chunkcache - # try to add to existing cache - if o + len(d) == offset and len(d) + len(data) < _chunksize: - self._chunkcache = o, d + data - else: - self._chunkcache = offset, data - - def _readsegment(self, offset, length, df=None): - """Load a segment of raw data from the revlog. - - Accepts an absolute offset, length to read, and an optional existing - file handle to read from. - - If an existing file handle is passed, it will be seeked and the - original seek position will NOT be restored. - - Returns a str or buffer of raw byte data. - - Raises if the requested number of bytes could not be read. - """ - # Cache data both forward and backward around the requested - # data, in a fixed size window. This helps speed up operations - # involving reading the revlog backwards. - cachesize = self._chunkcachesize - realoffset = offset & ~(cachesize - 1) - reallength = ( - (offset + length + cachesize) & ~(cachesize - 1) - ) - realoffset - with self._datareadfp(df) as df: - df.seek(realoffset) - d = df.read(reallength) - - self._cachesegment(realoffset, d) - if offset != realoffset or reallength != length: - startoffset = offset - realoffset - if len(d) - startoffset < length: - raise error.RevlogError( - _( - b'partial read of revlog %s; expected %d bytes from ' - b'offset %d, got %d' - ) - % ( - self.indexfile if self._inline else self.datafile, - length, - realoffset, - len(d) - startoffset, - ) - ) - - return util.buffer(d, startoffset, length) - - if len(d) < length: - raise error.RevlogError( - _( - b'partial read of revlog %s; expected %d bytes from offset ' - b'%d, got %d' - ) - % ( - self.indexfile if self._inline else self.datafile, - length, - offset, - len(d), - ) - ) - - return d - - def _getsegment(self, offset, length, df=None): - """Obtain a segment of raw data from the revlog. - - Accepts an absolute offset, length of bytes to obtain, and an - optional file handle to the already-opened revlog. If the file - handle is used, it's original seek position will not be preserved. - - Requests for data may be returned from a cache. - - Returns a str or a buffer instance of raw byte data. - """ - o, d = self._chunkcache - l = len(d) - - # is it in the cache? - cachestart = offset - o - cacheend = cachestart + length - if cachestart >= 0 and cacheend <= l: - if cachestart == 0 and cacheend == l: - return d # avoid a copy - return util.buffer(d, cachestart, cacheend - cachestart) - - return self._readsegment(offset, length, df=df) - def _getsegmentforrevs(self, startrev, endrev, df=None): """Obtain a segment of raw data corresponding to a range of revisions. @@ -1707,7 +1635,7 @@ end += (endrev + 1) * self.index.entry_size length = end - start - return start, self._getsegment(start, length, df=df) + return start, self._segmentfile.read_chunk(start, length, df) def _chunk(self, rev, df=None): """Obtain a single decompressed chunk for a revision. @@ -1718,7 +1646,18 @@ Returns a str holding uncompressed data for the requested revision. """ - return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1]) + compression_mode = self.index[rev][10] + data = self._getsegmentforrevs(rev, rev, df=df)[1] + if compression_mode == COMP_MODE_PLAIN: + return data + elif compression_mode == COMP_MODE_DEFAULT: + return self._decompressor(data) + elif compression_mode == COMP_MODE_INLINE: + return self.decompress(data) + else: + msg = b'unknown compression mode %d' + msg %= compression_mode + raise error.RevlogError(msg) def _chunks(self, revs, df=None, targetsize=None): """Obtain decompressed chunks for the specified revisions. @@ -1766,19 +1705,28 @@ return [self._chunk(rev, df=df) for rev in revschunk] decomp = self.decompress + # self._decompressor might be None, but will not be used in that case + def_decomp = self._decompressor for rev in revschunk: chunkstart = start(rev) if inline: chunkstart += (rev + 1) * iosize chunklength = length(rev) - ladd(decomp(buffer(data, chunkstart - offset, chunklength))) + comp_mode = self.index[rev][10] + c = buffer(data, chunkstart - offset, chunklength) + if comp_mode == COMP_MODE_PLAIN: + ladd(c) + elif comp_mode == COMP_MODE_INLINE: + ladd(decomp(c)) + elif comp_mode == COMP_MODE_DEFAULT: + ladd(def_decomp(c)) + else: + msg = b'unknown compression mode %d' + msg %= comp_mode + raise error.RevlogError(msg) return l - def _chunkclear(self): - """Clear the raw chunk cache.""" - self._chunkcache = (0, b'') - def deltaparent(self, rev): """return deltaparent of the given revision""" base = self.index[rev][3] @@ -1854,7 +1802,7 @@ b'use revlog.rawdata(...)' ) util.nouideprecwarn(msg, b'5.2', stacklevel=2) - return self._revisiondata(nodeorrev, _df, raw=raw)[0] + return self._revisiondata(nodeorrev, _df, raw=raw) def sidedata(self, nodeorrev, _df=None): """a map of extra data related to the changeset but not part of the hash @@ -1863,7 +1811,12 @@ mapping object will likely be used in the future for a more efficient/lazy code. """ - return self._revisiondata(nodeorrev, _df)[1] + # deal with argument type + if isinstance(nodeorrev, int): + rev = nodeorrev + else: + rev = self.rev(nodeorrev) + return self._sidedata(rev) def _revisiondata(self, nodeorrev, _df=None, raw=False): # deal with argument type @@ -1875,24 +1828,17 @@ rev = None # fast path the special `nullid` rev - if node == nullid: - return b"", {} + if node == self.nullid: + return b"" # ``rawtext`` is the text as stored inside the revlog. Might be the # revision or might need to be processed to retrieve the revision. rev, rawtext, validated = self._rawtext(node, rev, _df=_df) - if self.version & 0xFFFF == REVLOGV2: - if rev is None: - rev = self.rev(node) - sidedata = self._sidedata(rev) - else: - sidedata = {} - if raw and validated: # if we don't want to process the raw text and that raw # text is cached, we can exit early. - return rawtext, sidedata + return rawtext if rev is None: rev = self.rev(node) # the revlog's flag for this revision @@ -1901,7 +1847,7 @@ if validated and flags == REVIDX_DEFAULT_FLAGS: # no extra flags set, no flag processor runs, text = rawtext - return rawtext, sidedata + return rawtext if raw: validatehash = flagutil.processflagsraw(self, rawtext, flags) @@ -1914,7 +1860,7 @@ if not validated: self._revisioncache = (node, rev, rawtext) - return text, sidedata + return text def _rawtext(self, node, rev, _df=None): """return the possibly unvalidated rawtext for a revision @@ -1970,7 +1916,30 @@ if sidedata_size == 0: return {} - segment = self._getsegment(sidedata_offset, sidedata_size) + if self._docket.sidedata_end < sidedata_offset + sidedata_size: + filename = self._sidedatafile + end = self._docket.sidedata_end + offset = sidedata_offset + length = sidedata_size + m = FILE_TOO_SHORT_MSG % (filename, length, offset, end) + raise error.RevlogError(m) + + comp_segment = self._segmentfile_sidedata.read_chunk( + sidedata_offset, sidedata_size + ) + + comp = self.index[rev][11] + if comp == COMP_MODE_PLAIN: + segment = comp_segment + elif comp == COMP_MODE_DEFAULT: + segment = self._decompressor(comp_segment) + elif comp == COMP_MODE_INLINE: + segment = self.decompress(comp_segment) + else: + msg = b'unknown compression mode %d' + msg %= comp + raise error.RevlogError(msg) + sidedata = sidedatautil.deserialize_sidedata(segment) return sidedata @@ -1979,7 +1948,7 @@ _df - an existing file handle to read from. (internal-only) """ - return self._revisiondata(nodeorrev, _df, raw=True)[0] + return self._revisiondata(nodeorrev, _df, raw=True) def hash(self, text, p1, p2): """Compute a node hash. @@ -2013,14 +1982,14 @@ revornode = templatefilters.short(hex(node)) raise error.RevlogError( _(b"integrity check failed on %s:%s") - % (self.indexfile, pycompat.bytestr(revornode)) + % (self.display_id, pycompat.bytestr(revornode)) ) except error.RevlogError: if self._censorable and storageutil.iscensoredtext(text): - raise error.CensoredNodeError(self.indexfile, node, text) + raise error.CensoredNodeError(self.display_id, node, text) raise - def _enforceinlinesize(self, tr, fp=None): + def _enforceinlinesize(self, tr): """Check if the revlog is too big for inline and convert if so. This should be called after revisions are added to the revlog. If the @@ -2028,51 +1997,172 @@ to use multiple index and data files. """ tiprev = len(self) - 1 - if ( - not self._inline - or (self.start(tiprev) + self.length(tiprev)) < _maxinline - ): + total_size = self.start(tiprev) + self.length(tiprev) + if not self._inline or total_size < _maxinline: return - troffset = tr.findoffset(self.indexfile) + troffset = tr.findoffset(self._indexfile) if troffset is None: raise error.RevlogError( - _(b"%s not found in the transaction") % self.indexfile + _(b"%s not found in the transaction") % self._indexfile ) trindex = 0 - tr.add(self.datafile, 0) - - if fp: + tr.add(self._datafile, 0) + + existing_handles = False + if self._writinghandles is not None: + existing_handles = True + fp = self._writinghandles[0] fp.flush() fp.close() # We can't use the cached file handle after close(). So prevent # its usage. self._writinghandles = None - - with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh: - for r in self: - dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1]) - if troffset <= self.start(r): - trindex = r - - with self._indexfp(b'w') as fp: - self.version &= ~FLAG_INLINE_DATA - self._inline = False - io = self._io - for i in self: - e = io.packentry(self.index[i], self.node, self.version, i) - fp.write(e) - - # the temp file replace the real index when we exit the context - # manager - - tr.replace(self.indexfile, trindex * self.index.entry_size) - nodemaputil.setup_persistent_nodemap(tr, self) - self._chunkclear() + self._segmentfile.writing_handle = None + # No need to deal with sidedata writing handle as it is only + # relevant with revlog-v2 which is never inline, not reaching + # this code + + new_dfh = self._datafp(b'w+') + new_dfh.truncate(0) # drop any potentially existing data + try: + with self._indexfp() as read_ifh: + for r in self: + new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1]) + if troffset <= self.start(r) + r * self.index.entry_size: + trindex = r + new_dfh.flush() + + with self.__index_new_fp() as fp: + self._format_flags &= ~FLAG_INLINE_DATA + self._inline = False + for i in self: + e = self.index.entry_binary(i) + if i == 0 and self._docket is None: + header = self._format_flags | self._format_version + header = self.index.pack_header(header) + e = header + e + fp.write(e) + if self._docket is not None: + self._docket.index_end = fp.tell() + + # There is a small transactional race here. If the rename of + # the index fails, we should remove the datafile. It is more + # important to ensure that the data file is not truncated + # when the index is replaced as otherwise data is lost. + tr.replace(self._datafile, self.start(trindex)) + + # the temp file replace the real index when we exit the context + # manager + + tr.replace(self._indexfile, trindex * self.index.entry_size) + nodemaputil.setup_persistent_nodemap(tr, self) + self._segmentfile = randomaccessfile.randomaccessfile( + self.opener, + self._datafile, + self._chunkcachesize, + ) + + if existing_handles: + # switched from inline to conventional reopen the index + ifh = self.__index_write_fp() + self._writinghandles = (ifh, new_dfh, None) + self._segmentfile.writing_handle = new_dfh + new_dfh = None + # No need to deal with sidedata writing handle as it is only + # relevant with revlog-v2 which is never inline, not reaching + # this code + finally: + if new_dfh is not None: + new_dfh.close() def _nodeduplicatecallback(self, transaction, node): """called when trying to add a node already stored.""" + @contextlib.contextmanager + def reading(self): + """Context manager that keeps data and sidedata files open for reading""" + with self._segmentfile.reading(): + with self._segmentfile_sidedata.reading(): + yield + + @contextlib.contextmanager + def _writing(self, transaction): + if self._trypending: + msg = b'try to write in a `trypending` revlog: %s' + msg %= self.display_id + raise error.ProgrammingError(msg) + if self._writinghandles is not None: + yield + else: + ifh = dfh = sdfh = None + try: + r = len(self) + # opening the data file. + dsize = 0 + if r: + dsize = self.end(r - 1) + dfh = None + if not self._inline: + try: + dfh = self._datafp(b"r+") + if self._docket is None: + dfh.seek(0, os.SEEK_END) + else: + dfh.seek(self._docket.data_end, os.SEEK_SET) + except IOError as inst: + if inst.errno != errno.ENOENT: + raise + dfh = self._datafp(b"w+") + transaction.add(self._datafile, dsize) + if self._sidedatafile is not None: + try: + sdfh = self.opener(self._sidedatafile, mode=b"r+") + dfh.seek(self._docket.sidedata_end, os.SEEK_SET) + except IOError as inst: + if inst.errno != errno.ENOENT: + raise + sdfh = self.opener(self._sidedatafile, mode=b"w+") + transaction.add( + self._sidedatafile, self._docket.sidedata_end + ) + + # opening the index file. + isize = r * self.index.entry_size + ifh = self.__index_write_fp() + if self._inline: + transaction.add(self._indexfile, dsize + isize) + else: + transaction.add(self._indexfile, isize) + # exposing all file handle for writing. + self._writinghandles = (ifh, dfh, sdfh) + self._segmentfile.writing_handle = ifh if self._inline else dfh + self._segmentfile_sidedata.writing_handle = sdfh + yield + if self._docket is not None: + self._write_docket(transaction) + finally: + self._writinghandles = None + self._segmentfile.writing_handle = None + self._segmentfile_sidedata.writing_handle = None + if dfh is not None: + dfh.close() + if sdfh is not None: + sdfh.close() + # closing the index file last to avoid exposing referent to + # potential unflushed data content. + if ifh is not None: + ifh.close() + + def _write_docket(self, transaction): + """write the current docket on disk + + Exist as a method to help changelog to implement transaction logic + + We could also imagine using the same transaction logic for all revlog + since docket are cheap.""" + self._docket.write(transaction) + def addrevision( self, text, @@ -2102,12 +2192,12 @@ """ if link == nullrev: raise error.RevlogError( - _(b"attempted to add linkrev -1 to %s") % self.indexfile + _(b"attempted to add linkrev -1 to %s") % self.display_id ) if sidedata is None: sidedata = {} - elif not self.hassidedata: + elif sidedata and not self.hassidedata: raise error.ProgrammingError( _(b"trying to add sidedata to a revlog who don't support them") ) @@ -2127,7 +2217,7 @@ _( b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB" ) - % (self.indexfile, len(rawtext)) + % (self.display_id, len(rawtext)) ) node = node or self.hash(rawtext, p1, p2) @@ -2168,11 +2258,7 @@ useful when reusing a revision not stored in this revlog (ex: received over wire, or read from an external bundle). """ - dfh = None - if not self._inline: - dfh = self._datafp(b"a+") - ifh = self._indexfp(b"a+") - try: + with self._writing(transaction): return self._addrevision( node, rawtext, @@ -2182,15 +2268,9 @@ p2, flags, cachedelta, - ifh, - dfh, deltacomputer=deltacomputer, sidedata=sidedata, ) - finally: - if dfh: - dfh.close() - ifh.close() def compress(self, data): """Generate a possibly-compressed representation of data.""" @@ -2253,17 +2333,7 @@ elif t == b'u': return util.buffer(data, 1) - try: - compressor = self._decompressors[t] - except KeyError: - try: - engine = util.compengines.forrevlogheader(t) - compressor = engine.revlogcompressor(self._compengineopts) - self._decompressors[t] = compressor - except KeyError: - raise error.RevlogError( - _(b'unknown compression type %s') % binascii.hexlify(t) - ) + compressor = self._get_decompressor(t) return compressor.decompress(data) @@ -2277,8 +2347,6 @@ p2, flags, cachedelta, - ifh, - dfh, alwayscache=False, deltacomputer=None, sidedata=None, @@ -2296,19 +2364,25 @@ - rawtext is optional (can be None); if not set, cachedelta must be set. if both are set, they must correspond to each other. """ - if node == nullid: + if node == self.nullid: raise error.RevlogError( - _(b"%s: attempt to add null revision") % self.indexfile + _(b"%s: attempt to add null revision") % self.display_id ) - if node == wdirid or node in wdirfilenodeids: + if ( + node == self.nodeconstants.wdirid + or node in self.nodeconstants.wdirfilenodeids + ): raise error.RevlogError( - _(b"%s: attempt to add wdir revision") % self.indexfile + _(b"%s: attempt to add wdir revision") % self.display_id ) + if self._writinghandles is None: + msg = b'adding revision outside `revlog._writing` context' + raise error.ProgrammingError(msg) if self._inline: - fh = ifh + fh = self._writinghandles[0] else: - fh = dfh + fh = self._writinghandles[1] btext = [rawtext] @@ -2318,18 +2392,20 @@ offset = self._get_data_offset(prev) if self._concurrencychecker: + ifh, dfh, sdfh = self._writinghandles + # XXX no checking for the sidedata file if self._inline: # offset is "as if" it were in the .d file, so we need to add on # the size of the entry metadata. self._concurrencychecker( - ifh, self.indexfile, offset + curr * self.index.entry_size + ifh, self._indexfile, offset + curr * self.index.entry_size ) else: # Entries in the .i are a consistent size. self._concurrencychecker( - ifh, self.indexfile, curr * self.index.entry_size + ifh, self._indexfile, curr * self.index.entry_size ) - self._concurrencychecker(dfh, self.datafile, offset) + self._concurrencychecker(dfh, self._datafile, offset) p1r, p2r = self.rev(p1), self.rev(p2) @@ -2348,13 +2424,45 @@ if deltacomputer is None: deltacomputer = deltautil.deltacomputer(self) - revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags) + revinfo = revlogutils.revisioninfo( + node, + p1, + p2, + btext, + textlen, + cachedelta, + flags, + ) deltainfo = deltacomputer.finddeltainfo(revinfo, fh) - if sidedata: + compression_mode = COMP_MODE_INLINE + if self._docket is not None: + default_comp = self._docket.default_compression_header + r = deltautil.delta_compression(default_comp, deltainfo) + compression_mode, deltainfo = r + + sidedata_compression_mode = COMP_MODE_INLINE + if sidedata and self.hassidedata: + sidedata_compression_mode = COMP_MODE_PLAIN serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) - sidedata_offset = offset + deltainfo.deltalen + sidedata_offset = self._docket.sidedata_end + h, comp_sidedata = self.compress(serialized_sidedata) + if ( + h != b'u' + and comp_sidedata[0:1] != b'\0' + and len(comp_sidedata) < len(serialized_sidedata) + ): + assert not h + if ( + comp_sidedata[0:1] + == self._docket.default_compression_header + ): + sidedata_compression_mode = COMP_MODE_DEFAULT + serialized_sidedata = comp_sidedata + else: + sidedata_compression_mode = COMP_MODE_INLINE + serialized_sidedata = comp_sidedata else: serialized_sidedata = b"" # Don't store the offset if the sidedata is empty, that way @@ -2362,33 +2470,36 @@ # than ones we manually add. sidedata_offset = 0 - e = ( - offset_type(offset, flags), - deltainfo.deltalen, - textlen, - deltainfo.base, - link, - p1r, - p2r, - node, - sidedata_offset, - len(serialized_sidedata), + e = revlogutils.entry( + flags=flags, + data_offset=offset, + data_compressed_length=deltainfo.deltalen, + data_uncompressed_length=textlen, + data_compression_mode=compression_mode, + data_delta_base=deltainfo.base, + link_rev=link, + parent_rev_1=p1r, + parent_rev_2=p2r, + node_id=node, + sidedata_offset=sidedata_offset, + sidedata_compressed_length=len(serialized_sidedata), + sidedata_compression_mode=sidedata_compression_mode, ) - if self.version & 0xFFFF != REVLOGV2: - e = e[:8] - self.index.append(e) - entry = self._io.packentry(e, self.node, self.version, curr) + entry = self.index.entry_binary(curr) + if curr == 0 and self._docket is None: + header = self._format_flags | self._format_version + header = self.index.pack_header(header) + entry = header + entry self._writeentry( transaction, - ifh, - dfh, entry, deltainfo.data, link, offset, serialized_sidedata, + sidedata_offset, ) rawtext = btext[0] @@ -2410,19 +2521,13 @@ to `n - 1`'s sidedata being written after `n`'s data. TODO cache this in a docket file before getting out of experimental.""" - if self.version & 0xFFFF != REVLOGV2: + if self._docket is None: return self.end(prev) - - offset = 0 - for rev, entry in enumerate(self.index): - sidedata_end = entry[8] + entry[9] - # Sidedata for a previous rev has potentially been written after - # this rev's end, so take the max. - offset = max(self.end(rev), offset, sidedata_end) - return offset + else: + return self._docket.data_end def _writeentry( - self, transaction, ifh, dfh, entry, data, link, offset, sidedata + self, transaction, entry, data, link, offset, sidedata, sidedata_offset ): # Files opened in a+ mode have inconsistent behavior on various # platforms. Windows requires that a file positioning call be made @@ -2436,29 +2541,47 @@ # Note: This is likely not necessary on Python 3. However, because # the file handle is reused for reads and may be seeked there, we need # to be careful before changing this. - ifh.seek(0, os.SEEK_END) + if self._writinghandles is None: + msg = b'adding revision outside `revlog._writing` context' + raise error.ProgrammingError(msg) + ifh, dfh, sdfh = self._writinghandles + if self._docket is None: + ifh.seek(0, os.SEEK_END) + else: + ifh.seek(self._docket.index_end, os.SEEK_SET) if dfh: - dfh.seek(0, os.SEEK_END) + if self._docket is None: + dfh.seek(0, os.SEEK_END) + else: + dfh.seek(self._docket.data_end, os.SEEK_SET) + if sdfh: + sdfh.seek(self._docket.sidedata_end, os.SEEK_SET) curr = len(self) - 1 if not self._inline: - transaction.add(self.datafile, offset) - transaction.add(self.indexfile, curr * len(entry)) + transaction.add(self._datafile, offset) + if self._sidedatafile: + transaction.add(self._sidedatafile, sidedata_offset) + transaction.add(self._indexfile, curr * len(entry)) if data[0]: dfh.write(data[0]) dfh.write(data[1]) if sidedata: - dfh.write(sidedata) + sdfh.write(sidedata) ifh.write(entry) else: offset += curr * self.index.entry_size - transaction.add(self.indexfile, offset) + transaction.add(self._indexfile, offset) ifh.write(entry) ifh.write(data[0]) ifh.write(data[1]) - if sidedata: - ifh.write(sidedata) - self._enforceinlinesize(transaction, ifh) + assert not sidedata + self._enforceinlinesize(transaction) + if self._docket is not None: + self._docket.index_end = self._writinghandles[0].tell() + self._docket.data_end = self._writinghandles[1].tell() + self._docket.sidedata_end = self._writinghandles[2].tell() + nodemaputil.setup_persistent_nodemap(transaction, self) def addgroup( @@ -2481,115 +2604,93 @@ this revlog and the node that was added. """ - if self._writinghandles: + if self._adding_group: raise error.ProgrammingError(b'cannot nest addgroup() calls') - r = len(self) - end = 0 - if r: - end = self.end(r - 1) - ifh = self._indexfp(b"a+") - isize = r * self.index.entry_size - if self._inline: - transaction.add(self.indexfile, end + isize) - dfh = None - else: - transaction.add(self.indexfile, isize) - transaction.add(self.datafile, end) - dfh = self._datafp(b"a+") - - def flush(): - if dfh: - dfh.flush() - ifh.flush() - - self._writinghandles = (ifh, dfh) + self._adding_group = True empty = True - try: - deltacomputer = deltautil.deltacomputer(self) - # loop through our set of deltas - for data in deltas: - node, p1, p2, linknode, deltabase, delta, flags, sidedata = data - link = linkmapper(linknode) - flags = flags or REVIDX_DEFAULT_FLAGS - - rev = self.index.get_rev(node) - if rev is not None: - # this can happen if two branches make the same change - self._nodeduplicatecallback(transaction, rev) - if duplicaterevisioncb: - duplicaterevisioncb(self, rev) - empty = False - continue - - for p in (p1, p2): - if not self.index.has_node(p): + with self._writing(transaction): + deltacomputer = deltautil.deltacomputer(self) + # loop through our set of deltas + for data in deltas: + ( + node, + p1, + p2, + linknode, + deltabase, + delta, + flags, + sidedata, + ) = data + link = linkmapper(linknode) + flags = flags or REVIDX_DEFAULT_FLAGS + + rev = self.index.get_rev(node) + if rev is not None: + # this can happen if two branches make the same change + self._nodeduplicatecallback(transaction, rev) + if duplicaterevisioncb: + duplicaterevisioncb(self, rev) + empty = False + continue + + for p in (p1, p2): + if not self.index.has_node(p): + raise error.LookupError( + p, self.radix, _(b'unknown parent') + ) + + if not self.index.has_node(deltabase): raise error.LookupError( - p, self.indexfile, _(b'unknown parent') + deltabase, self.display_id, _(b'unknown delta base') ) - if not self.index.has_node(deltabase): - raise error.LookupError( - deltabase, self.indexfile, _(b'unknown delta base') + baserev = self.rev(deltabase) + + if baserev != nullrev and self.iscensored(baserev): + # if base is censored, delta must be full replacement in a + # single patch operation + hlen = struct.calcsize(b">lll") + oldlen = self.rawsize(baserev) + newlen = len(delta) - hlen + if delta[:hlen] != mdiff.replacediffheader( + oldlen, newlen + ): + raise error.CensoredBaseError( + self.display_id, self.node(baserev) + ) + + if not flags and self._peek_iscensored(baserev, delta): + flags |= REVIDX_ISCENSORED + + # We assume consumers of addrevisioncb will want to retrieve + # the added revision, which will require a call to + # revision(). revision() will fast path if there is a cache + # hit. So, we tell _addrevision() to always cache in this case. + # We're only using addgroup() in the context of changegroup + # generation so the revision data can always be handled as raw + # by the flagprocessor. + rev = self._addrevision( + node, + None, + transaction, + link, + p1, + p2, + flags, + (baserev, delta), + alwayscache=alwayscache, + deltacomputer=deltacomputer, + sidedata=sidedata, ) - baserev = self.rev(deltabase) - - if baserev != nullrev and self.iscensored(baserev): - # if base is censored, delta must be full replacement in a - # single patch operation - hlen = struct.calcsize(b">lll") - oldlen = self.rawsize(baserev) - newlen = len(delta) - hlen - if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen): - raise error.CensoredBaseError( - self.indexfile, self.node(baserev) - ) - - if not flags and self._peek_iscensored(baserev, delta, flush): - flags |= REVIDX_ISCENSORED - - # We assume consumers of addrevisioncb will want to retrieve - # the added revision, which will require a call to - # revision(). revision() will fast path if there is a cache - # hit. So, we tell _addrevision() to always cache in this case. - # We're only using addgroup() in the context of changegroup - # generation so the revision data can always be handled as raw - # by the flagprocessor. - rev = self._addrevision( - node, - None, - transaction, - link, - p1, - p2, - flags, - (baserev, delta), - ifh, - dfh, - alwayscache=alwayscache, - deltacomputer=deltacomputer, - sidedata=sidedata, - ) - - if addrevisioncb: - addrevisioncb(self, rev) - empty = False - - if not dfh and not self._inline: - # addrevision switched from inline to conventional - # reopen the index - ifh.close() - dfh = self._datafp(b"a+") - ifh = self._indexfp(b"a+") - self._writinghandles = (ifh, dfh) + if addrevisioncb: + addrevisioncb(self, rev) + empty = False finally: - self._writinghandles = None - - if dfh: - dfh.close() - ifh.close() + self._adding_group = False return not empty def iscensored(self, rev): @@ -2599,7 +2700,7 @@ return self.flags(rev) & REVIDX_ISCENSORED - def _peek_iscensored(self, baserev, delta, flush): + def _peek_iscensored(self, baserev, delta): """Quickly check if a delta produces a censored revision.""" if not self._censorable: return False @@ -2642,19 +2743,31 @@ return # first truncate the files on disk - end = self.start(rev) + data_end = self.start(rev) if not self._inline: - transaction.add(self.datafile, end) + transaction.add(self._datafile, data_end) end = rev * self.index.entry_size else: - end += rev * self.index.entry_size - - transaction.add(self.indexfile, end) + end = data_end + (rev * self.index.entry_size) + + if self._sidedatafile: + sidedata_end = self.sidedata_cut_off(rev) + transaction.add(self._sidedatafile, sidedata_end) + + transaction.add(self._indexfile, end) + if self._docket is not None: + # XXX we could, leverage the docket while stripping. However it is + # not powerfull enough at the time of this comment + self._docket.index_end = end + self._docket.data_end = data_end + self._docket.sidedata_end = sidedata_end + self._docket.write(transaction, stripping=True) # then reset internal state in memory to forget those revisions self._revisioncache = None self._chaininfocache = util.lrucachedict(500) - self._chunkclear() + self._segmentfile.clear_cache() + self._segmentfile_sidedata.clear_cache() del self.index[rev:-1] @@ -2682,7 +2795,7 @@ dd = 0 try: - f = self.opener(self.indexfile) + f = self.opener(self._indexfile) f.seek(0, io.SEEK_END) actual = f.tell() f.close() @@ -2703,9 +2816,19 @@ return (dd, di) def files(self): - res = [self.indexfile] - if not self._inline: - res.append(self.datafile) + res = [self._indexfile] + if self._docket_file is None: + if not self._inline: + res.append(self._datafile) + else: + res.append(self._docket_file) + res.extend(self._docket.old_index_filepaths(include_empty=False)) + if self._docket.data_end: + res.append(self._datafile) + res.extend(self._docket.old_data_filepaths(include_empty=False)) + if self._docket.sidedata_end: + res.append(self._sidedatafile) + res.extend(self._docket.old_sidedata_filepaths(include_empty=False)) return res def emitrevisions( @@ -2762,7 +2885,7 @@ addrevisioncb=None, deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None, - sidedatacompanion=None, + sidedata_helpers=None, ): """Copy this revlog to another, possibly with format changes. @@ -2805,21 +2928,8 @@ argument controls whether to force compute deltas against both parents for merges. By default, the current default is used. - If not None, the `sidedatacompanion` is callable that accept two - arguments: - - (srcrevlog, rev) - - and return a quintet that control changes to sidedata content from the - old revision to the new clone result: - - (dropall, filterout, update, new_flags, dropped_flags) - - * if `dropall` is True, all sidedata should be dropped - * `filterout` is a set of sidedata keys that should be dropped - * `update` is a mapping of additionnal/new key -> value - * new_flags is a bitfields of new flags that the revision should get - * dropped_flags is a bitfields of new flags that the revision shoudl not longer have + See `revlogutil.sidedata.get_sidedata_helpers` for the doc on + `sidedata_helpers`. """ if deltareuse not in self.DELTAREUSEALL: raise ValueError( @@ -2859,7 +2969,7 @@ addrevisioncb, deltareuse, forcedeltabothparents, - sidedatacompanion, + sidedata_helpers, ) finally: @@ -2874,7 +2984,7 @@ addrevisioncb, deltareuse, forcedeltabothparents, - sidedatacompanion, + sidedata_helpers, ): """perform the core duty of `revlog.clone` after parameter processing""" deltacomputer = deltautil.deltacomputer(destrevlog) @@ -2890,31 +3000,19 @@ p2 = index[entry[6]][7] node = entry[7] - sidedataactions = (False, [], {}, 0, 0) - if sidedatacompanion is not None: - sidedataactions = sidedatacompanion(self, rev) - # (Possibly) reuse the delta from the revlog if allowed and # the revlog chunk is a delta. cachedelta = None rawtext = None - if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD: - dropall = sidedataactions[0] - filterout = sidedataactions[1] - update = sidedataactions[2] - new_flags = sidedataactions[3] - dropped_flags = sidedataactions[4] - text, sidedata = self._revisiondata(rev) - if dropall: - sidedata = {} - for key in filterout: - sidedata.pop(key, None) - sidedata.update(update) - if not sidedata: - sidedata = None - - flags |= new_flags - flags &= ~dropped_flags + if deltareuse == self.DELTAREUSEFULLADD: + text = self._revisiondata(rev) + sidedata = self.sidedata(rev) + + if sidedata_helpers is not None: + (sidedata, new_flags) = sidedatautil.run_sidedata_helpers( + self, sidedata_helpers, sidedata, rev + ) + flags = flags | new_flags[0] & ~new_flags[1] destrevlog.addrevision( text, @@ -2934,16 +3032,20 @@ if dp != nullrev: cachedelta = (dp, bytes(self._chunk(rev))) + sidedata = None if not cachedelta: - rawtext = self.rawdata(rev) - - ifh = destrevlog.opener( - destrevlog.indexfile, b'a+', checkambig=False - ) - dfh = None - if not destrevlog._inline: - dfh = destrevlog.opener(destrevlog.datafile, b'a+') - try: + rawtext = self._revisiondata(rev) + sidedata = self.sidedata(rev) + if sidedata is None: + sidedata = self.sidedata(rev) + + if sidedata_helpers is not None: + (sidedata, new_flags) = sidedatautil.run_sidedata_helpers( + self, sidedata_helpers, sidedata, rev + ) + flags = flags | new_flags[0] & ~new_flags[1] + + with destrevlog._writing(tr): destrevlog._addrevision( node, rawtext, @@ -2953,100 +3055,23 @@ p2, flags, cachedelta, - ifh, - dfh, deltacomputer=deltacomputer, + sidedata=sidedata, ) - finally: - if dfh: - dfh.close() - ifh.close() if addrevisioncb: addrevisioncb(self, rev, node) def censorrevision(self, tr, censornode, tombstone=b''): - if (self.version & 0xFFFF) == REVLOGV0: + if self._format_version == REVLOGV0: raise error.RevlogError( - _(b'cannot censor with version %d revlogs') % self.version - ) - - censorrev = self.rev(censornode) - tombstone = storageutil.packmeta({b'censored': tombstone}, b'') - - if len(tombstone) > self.rawsize(censorrev): - raise error.Abort( - _(b'censor tombstone must be no longer than censored data') + _(b'cannot censor with version %d revlogs') + % self._format_version ) - - # Rewriting the revlog in place is hard. Our strategy for censoring is - # to create a new revlog, copy all revisions to it, then replace the - # revlogs on transaction close. - - newindexfile = self.indexfile + b'.tmpcensored' - newdatafile = self.datafile + b'.tmpcensored' - - # This is a bit dangerous. We could easily have a mismatch of state. - newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True) - newrl.version = self.version - newrl._generaldelta = self._generaldelta - newrl._io = self._io - - for rev in self.revs(): - node = self.node(rev) - p1, p2 = self.parents(node) - - if rev == censorrev: - newrl.addrawrevision( - tombstone, - tr, - self.linkrev(censorrev), - p1, - p2, - censornode, - REVIDX_ISCENSORED, - ) - - if newrl.deltaparent(rev) != nullrev: - raise error.Abort( - _( - b'censored revision stored as delta; ' - b'cannot censor' - ), - hint=_( - b'censoring of revlogs is not ' - b'fully implemented; please report ' - b'this bug' - ), - ) - continue - - if self.iscensored(rev): - if self.deltaparent(rev) != nullrev: - raise error.Abort( - _( - b'cannot censor due to censored ' - b'revision having delta stored' - ) - ) - rawtext = self._chunk(rev) - else: - rawtext = self.rawdata(rev) - - newrl.addrawrevision( - rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev) - ) - - tr.addbackup(self.indexfile, location=b'store') - if not self._inline: - tr.addbackup(self.datafile, location=b'store') - - self.opener.rename(newrl.indexfile, self.indexfile) - if not self._inline: - self.opener.rename(newrl.datafile, self.datafile) - - self.clearcaches() - self._loadindex() + elif self._format_version == REVLOGV1: + rewrite.v1_censor(self, tr, censornode, tombstone) + else: + rewrite.v2_censor(self, tr, censornode, tombstone) def verifyintegrity(self, state): """Verifies the integrity of the revlog. @@ -3060,13 +3085,13 @@ if di: yield revlogproblem(error=_(b'index contains %d extra bytes') % di) - version = self.version & 0xFFFF + version = self._format_version # The verifier tells us what version revlog we should be. if version != state[b'expectedversion']: yield revlogproblem( warning=_(b"warning: '%s' uses revlog format %d; expected %d") - % (self.indexfile, version, state[b'expectedversion']) + % (self.display_id, version, state[b'expectedversion']) ) state[b'skipread'] = set() @@ -3164,9 +3189,9 @@ d = {} if exclusivefiles: - d[b'exclusivefiles'] = [(self.opener, self.indexfile)] + d[b'exclusivefiles'] = [(self.opener, self._indexfile)] if not self._inline: - d[b'exclusivefiles'].append((self.opener, self.datafile)) + d[b'exclusivefiles'].append((self.opener, self._datafile)) if sharedfiles: d[b'sharedfiles'] = [] @@ -3184,12 +3209,10 @@ return d - def rewrite_sidedata(self, helpers, startrev, endrev): - if self.version & 0xFFFF != REVLOGV2: + def rewrite_sidedata(self, transaction, helpers, startrev, endrev): + if not self.hassidedata: return - # inline are not yet supported because they suffer from an issue when - # rewriting them (since it's not an append-only operation). - # See issue6485. + # revlog formats with sidedata support does not support inline assert not self._inline if not helpers[1] and not helpers[2]: # Nothing to generate or remove @@ -3197,13 +3220,14 @@ new_entries = [] # append the new sidedata - with self._datafp(b'a+') as fp: - # Maybe this bug still exists, see revlog._writeentry - fp.seek(0, os.SEEK_END) - current_offset = fp.tell() + with self._writing(transaction): + ifh, dfh, sdfh = self._writinghandles + dfh.seek(self._docket.sidedata_end, os.SEEK_SET) + + current_offset = sdfh.tell() for rev in range(startrev, endrev + 1): entry = self.index[rev] - new_sidedata = storageutil.run_sidedata_helpers( + new_sidedata, flags = sidedatautil.run_sidedata_helpers( store=self, sidedata_helpers=helpers, sidedata={}, @@ -3213,24 +3237,58 @@ serialized_sidedata = sidedatautil.serialize_sidedata( new_sidedata ) + + sidedata_compression_mode = COMP_MODE_INLINE + if serialized_sidedata and self.hassidedata: + sidedata_compression_mode = COMP_MODE_PLAIN + h, comp_sidedata = self.compress(serialized_sidedata) + if ( + h != b'u' + and comp_sidedata[0] != b'\0' + and len(comp_sidedata) < len(serialized_sidedata) + ): + assert not h + if ( + comp_sidedata[0] + == self._docket.default_compression_header + ): + sidedata_compression_mode = COMP_MODE_DEFAULT + serialized_sidedata = comp_sidedata + else: + sidedata_compression_mode = COMP_MODE_INLINE + serialized_sidedata = comp_sidedata if entry[8] != 0 or entry[9] != 0: # rewriting entries that already have sidedata is not # supported yet, because it introduces garbage data in the # revlog. - msg = b"Rewriting existing sidedata is not supported yet" + msg = b"rewriting existing sidedata is not supported yet" raise error.Abort(msg) - entry = entry[:8] - entry += (current_offset, len(serialized_sidedata)) - - fp.write(serialized_sidedata) - new_entries.append(entry) + + # Apply (potential) flags to add and to remove after running + # the sidedata helpers + new_offset_flags = entry[0] | flags[0] & ~flags[1] + entry_update = ( + current_offset, + len(serialized_sidedata), + new_offset_flags, + sidedata_compression_mode, + ) + + # the sidedata computation might have move the file cursors around + sdfh.seek(current_offset, os.SEEK_SET) + sdfh.write(serialized_sidedata) + new_entries.append(entry_update) current_offset += len(serialized_sidedata) - - # rewrite the new index entries - with self._indexfp(b'w+') as fp: - fp.seek(startrev * self.index.entry_size) - for i, entry in enumerate(new_entries): + self._docket.sidedata_end = sdfh.tell() + + # rewrite the new index entries + ifh.seek(startrev * self.index.entry_size) + for i, e in enumerate(new_entries): rev = startrev + i - self.index.replace_sidedata_info(rev, entry[8], entry[9]) - packed = self._io.packentry(entry, self.node, self.version, rev) - fp.write(packed) + self.index.replace_sidedata_info(rev, *e) + packed = self.index.entry_binary(rev) + if rev == 0 and self._docket is None: + header = self._format_flags | self._format_version + header = self.index.pack_header(header) + packed = header + packed + ifh.write(packed)