diff -r 9aeb9e2d28a7 -r 3a333a582d7b hgext/remotefilelog/contentstore.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgext/remotefilelog/contentstore.py Thu Sep 27 13:03:19 2018 -0400 @@ -0,0 +1,376 @@ +from __future__ import absolute_import + +import threading + +from mercurial.node import hex, nullid +from mercurial import ( + mdiff, + pycompat, + revlog, +) +from . import ( + basestore, + constants, + shallowutil, +) + +class ChainIndicies(object): + """A static class for easy reference to the delta chain indicies. + """ + # The filename of this revision delta + NAME = 0 + # The mercurial file node for this revision delta + NODE = 1 + # The filename of the delta base's revision. This is useful when delta + # between different files (like in the case of a move or copy, we can delta + # against the original file content). + BASENAME = 2 + # The mercurial file node for the delta base revision. This is the nullid if + # this delta is a full text. + BASENODE = 3 + # The actual delta or full text data. + DATA = 4 + +class unioncontentstore(basestore.baseunionstore): + def __init__(self, *args, **kwargs): + super(unioncontentstore, self).__init__(*args, **kwargs) + + self.stores = args + self.writestore = kwargs.get('writestore') + + # If allowincomplete==True then the union store can return partial + # delta chains, otherwise it will throw a KeyError if a full + # deltachain can't be found. + self.allowincomplete = kwargs.get('allowincomplete', False) + + def get(self, name, node): + """Fetches the full text revision contents of the given name+node pair. + If the full text doesn't exist, throws a KeyError. + + Under the hood, this uses getdeltachain() across all the stores to build + up a full chain to produce the full text. + """ + chain = self.getdeltachain(name, node) + + if chain[-1][ChainIndicies.BASENODE] != nullid: + # If we didn't receive a full chain, throw + raise KeyError((name, hex(node))) + + # The last entry in the chain is a full text, so we start our delta + # applies with that. + fulltext = chain.pop()[ChainIndicies.DATA] + + text = fulltext + while chain: + delta = chain.pop()[ChainIndicies.DATA] + text = mdiff.patches(text, [delta]) + + return text + + @basestore.baseunionstore.retriable + def getdelta(self, name, node): + """Return the single delta entry for the given name/node pair. + """ + for store in self.stores: + try: + return store.getdelta(name, node) + except KeyError: + pass + + raise KeyError((name, hex(node))) + + def getdeltachain(self, name, node): + """Returns the deltachain for the given name/node pair. + + Returns an ordered list of: + + [(name, node, deltabasename, deltabasenode, deltacontent),...] + + where the chain is terminated by a full text entry with a nullid + deltabasenode. + """ + chain = self._getpartialchain(name, node) + while chain[-1][ChainIndicies.BASENODE] != nullid: + x, x, deltabasename, deltabasenode, x = chain[-1] + try: + morechain = self._getpartialchain(deltabasename, deltabasenode) + chain.extend(morechain) + except KeyError: + # If we allow incomplete chains, don't throw. + if not self.allowincomplete: + raise + break + + return chain + + @basestore.baseunionstore.retriable + def getmeta(self, name, node): + """Returns the metadata dict for given node.""" + for store in self.stores: + try: + return store.getmeta(name, node) + except KeyError: + pass + raise KeyError((name, hex(node))) + + def getmetrics(self): + metrics = [s.getmetrics() for s in self.stores] + return shallowutil.sumdicts(*metrics) + + @basestore.baseunionstore.retriable + def _getpartialchain(self, name, node): + """Returns a partial delta chain for the given name/node pair. + + A partial chain is a chain that may not be terminated in a full-text. + """ + for store in self.stores: + try: + return store.getdeltachain(name, node) + except KeyError: + pass + + raise KeyError((name, hex(node))) + + def add(self, name, node, data): + raise RuntimeError("cannot add content only to remotefilelog " + "contentstore") + + def getmissing(self, keys): + missing = keys + for store in self.stores: + if missing: + missing = store.getmissing(missing) + return missing + + def addremotefilelognode(self, name, node, data): + if self.writestore: + self.writestore.addremotefilelognode(name, node, data) + else: + raise RuntimeError("no writable store configured") + + def markledger(self, ledger, options=None): + for store in self.stores: + store.markledger(ledger, options) + +class remotefilelogcontentstore(basestore.basestore): + def __init__(self, *args, **kwargs): + super(remotefilelogcontentstore, self).__init__(*args, **kwargs) + self._threaddata = threading.local() + + def get(self, name, node): + # return raw revision text + data = self._getdata(name, node) + + offset, size, flags = shallowutil.parsesizeflags(data) + content = data[offset:offset + size] + + ancestormap = shallowutil.ancestormap(data) + p1, p2, linknode, copyfrom = ancestormap[node] + copyrev = None + if copyfrom: + copyrev = hex(p1) + + self._updatemetacache(node, size, flags) + + # lfs tracks renames in its own metadata, remove hg copy metadata, + # because copy metadata will be re-added by lfs flag processor. + if flags & revlog.REVIDX_EXTSTORED: + copyrev = copyfrom = None + revision = shallowutil.createrevlogtext(content, copyfrom, copyrev) + return revision + + def getdelta(self, name, node): + # Since remotefilelog content stores only contain full texts, just + # return that. + revision = self.get(name, node) + return revision, name, nullid, self.getmeta(name, node) + + def getdeltachain(self, name, node): + # Since remotefilelog content stores just contain full texts, we return + # a fake delta chain that just consists of a single full text revision. + # The nullid in the deltabasenode slot indicates that the revision is a + # fulltext. + revision = self.get(name, node) + return [(name, node, None, nullid, revision)] + + def getmeta(self, name, node): + self._sanitizemetacache() + if node != self._threaddata.metacache[0]: + data = self._getdata(name, node) + offset, size, flags = shallowutil.parsesizeflags(data) + self._updatemetacache(node, size, flags) + return self._threaddata.metacache[1] + + def add(self, name, node, data): + raise RuntimeError("cannot add content only to remotefilelog " + "contentstore") + + def _sanitizemetacache(self): + metacache = getattr(self._threaddata, 'metacache', None) + if metacache is None: + self._threaddata.metacache = (None, None) # (node, meta) + + def _updatemetacache(self, node, size, flags): + self._sanitizemetacache() + if node == self._threaddata.metacache[0]: + return + meta = {constants.METAKEYFLAG: flags, + constants.METAKEYSIZE: size} + self._threaddata.metacache = (node, meta) + +class remotecontentstore(object): + def __init__(self, ui, fileservice, shared): + self._fileservice = fileservice + # type(shared) is usually remotefilelogcontentstore + self._shared = shared + + def get(self, name, node): + self._fileservice.prefetch([(name, hex(node))], force=True, + fetchdata=True) + return self._shared.get(name, node) + + def getdelta(self, name, node): + revision = self.get(name, node) + return revision, name, nullid, self._shared.getmeta(name, node) + + def getdeltachain(self, name, node): + # Since our remote content stores just contain full texts, we return a + # fake delta chain that just consists of a single full text revision. + # The nullid in the deltabasenode slot indicates that the revision is a + # fulltext. + revision = self.get(name, node) + return [(name, node, None, nullid, revision)] + + def getmeta(self, name, node): + self._fileservice.prefetch([(name, hex(node))], force=True, + fetchdata=True) + return self._shared.getmeta(name, node) + + def add(self, name, node, data): + raise RuntimeError("cannot add to a remote store") + + def getmissing(self, keys): + return keys + + def markledger(self, ledger, options=None): + pass + +class manifestrevlogstore(object): + def __init__(self, repo): + self._store = repo.store + self._svfs = repo.svfs + self._revlogs = dict() + self._cl = revlog.revlog(self._svfs, '00changelog.i') + self._repackstartlinkrev = 0 + + def get(self, name, node): + return self._revlog(name).revision(node, raw=True) + + def getdelta(self, name, node): + revision = self.get(name, node) + return revision, name, nullid, self.getmeta(name, node) + + def getdeltachain(self, name, node): + revision = self.get(name, node) + return [(name, node, None, nullid, revision)] + + def getmeta(self, name, node): + rl = self._revlog(name) + rev = rl.rev(node) + return {constants.METAKEYFLAG: rl.flags(rev), + constants.METAKEYSIZE: rl.rawsize(rev)} + + def getancestors(self, name, node, known=None): + if known is None: + known = set() + if node in known: + return [] + + rl = self._revlog(name) + ancestors = {} + missing = set((node,)) + for ancrev in rl.ancestors([rl.rev(node)], inclusive=True): + ancnode = rl.node(ancrev) + missing.discard(ancnode) + + p1, p2 = rl.parents(ancnode) + if p1 != nullid and p1 not in known: + missing.add(p1) + if p2 != nullid and p2 not in known: + missing.add(p2) + + linknode = self._cl.node(rl.linkrev(ancrev)) + ancestors[rl.node(ancrev)] = (p1, p2, linknode, '') + if not missing: + break + return ancestors + + def getnodeinfo(self, name, node): + cl = self._cl + rl = self._revlog(name) + parents = rl.parents(node) + linkrev = rl.linkrev(rl.rev(node)) + return (parents[0], parents[1], cl.node(linkrev), None) + + def add(self, *args): + raise RuntimeError("cannot add to a revlog store") + + def _revlog(self, name): + rl = self._revlogs.get(name) + if rl is None: + revlogname = '00manifesttree.i' + if name != '': + revlogname = 'meta/%s/00manifest.i' % name + rl = revlog.revlog(self._svfs, revlogname) + self._revlogs[name] = rl + return rl + + def getmissing(self, keys): + missing = [] + for name, node in keys: + mfrevlog = self._revlog(name) + if node not in mfrevlog.nodemap: + missing.append((name, node)) + + return missing + + def setrepacklinkrevrange(self, startrev, endrev): + self._repackstartlinkrev = startrev + self._repackendlinkrev = endrev + + def markledger(self, ledger, options=None): + if options and options.get(constants.OPTION_PACKSONLY): + return + treename = '' + rl = revlog.revlog(self._svfs, '00manifesttree.i') + startlinkrev = self._repackstartlinkrev + endlinkrev = self._repackendlinkrev + for rev in pycompat.xrange(len(rl) - 1, -1, -1): + linkrev = rl.linkrev(rev) + if linkrev < startlinkrev: + break + if linkrev > endlinkrev: + continue + node = rl.node(rev) + ledger.markdataentry(self, treename, node) + ledger.markhistoryentry(self, treename, node) + + for path, encoded, size in self._store.datafiles(): + if path[:5] != 'meta/' or path[-2:] != '.i': + continue + + treename = path[5:-len('/00manifest.i')] + + rl = revlog.revlog(self._svfs, path) + for rev in pycompat.xrange(len(rl) - 1, -1, -1): + linkrev = rl.linkrev(rev) + if linkrev < startlinkrev: + break + if linkrev > endlinkrev: + continue + node = rl.node(rev) + ledger.markdataentry(self, treename, node) + ledger.markhistoryentry(self, treename, node) + + def cleanup(self, ledger): + pass