hgext/remotefilelog/contentstore.py
changeset 40495 3a333a582d7b
child 40610 13d4ad8d7801
equal deleted inserted replaced
40494:9aeb9e2d28a7 40495:3a333a582d7b
       
     1 from __future__ import absolute_import
       
     2 
       
     3 import threading
       
     4 
       
     5 from mercurial.node import hex, nullid
       
     6 from mercurial import (
       
     7     mdiff,
       
     8     pycompat,
       
     9     revlog,
       
    10 )
       
    11 from . import (
       
    12     basestore,
       
    13     constants,
       
    14     shallowutil,
       
    15 )
       
    16 
       
    17 class ChainIndicies(object):
       
    18     """A static class for easy reference to the delta chain indicies.
       
    19     """
       
    20     # The filename of this revision delta
       
    21     NAME = 0
       
    22     # The mercurial file node for this revision delta
       
    23     NODE = 1
       
    24     # The filename of the delta base's revision. This is useful when delta
       
    25     # between different files (like in the case of a move or copy, we can delta
       
    26     # against the original file content).
       
    27     BASENAME = 2
       
    28     # The mercurial file node for the delta base revision. This is the nullid if
       
    29     # this delta is a full text.
       
    30     BASENODE = 3
       
    31     # The actual delta or full text data.
       
    32     DATA = 4
       
    33 
       
    34 class unioncontentstore(basestore.baseunionstore):
       
    35     def __init__(self, *args, **kwargs):
       
    36         super(unioncontentstore, self).__init__(*args, **kwargs)
       
    37 
       
    38         self.stores = args
       
    39         self.writestore = kwargs.get('writestore')
       
    40 
       
    41         # If allowincomplete==True then the union store can return partial
       
    42         # delta chains, otherwise it will throw a KeyError if a full
       
    43         # deltachain can't be found.
       
    44         self.allowincomplete = kwargs.get('allowincomplete', False)
       
    45 
       
    46     def get(self, name, node):
       
    47         """Fetches the full text revision contents of the given name+node pair.
       
    48         If the full text doesn't exist, throws a KeyError.
       
    49 
       
    50         Under the hood, this uses getdeltachain() across all the stores to build
       
    51         up a full chain to produce the full text.
       
    52         """
       
    53         chain = self.getdeltachain(name, node)
       
    54 
       
    55         if chain[-1][ChainIndicies.BASENODE] != nullid:
       
    56             # If we didn't receive a full chain, throw
       
    57             raise KeyError((name, hex(node)))
       
    58 
       
    59         # The last entry in the chain is a full text, so we start our delta
       
    60         # applies with that.
       
    61         fulltext = chain.pop()[ChainIndicies.DATA]
       
    62 
       
    63         text = fulltext
       
    64         while chain:
       
    65             delta = chain.pop()[ChainIndicies.DATA]
       
    66             text = mdiff.patches(text, [delta])
       
    67 
       
    68         return text
       
    69 
       
    70     @basestore.baseunionstore.retriable
       
    71     def getdelta(self, name, node):
       
    72         """Return the single delta entry for the given name/node pair.
       
    73         """
       
    74         for store in self.stores:
       
    75             try:
       
    76                 return store.getdelta(name, node)
       
    77             except KeyError:
       
    78                 pass
       
    79 
       
    80         raise KeyError((name, hex(node)))
       
    81 
       
    82     def getdeltachain(self, name, node):
       
    83         """Returns the deltachain for the given name/node pair.
       
    84 
       
    85         Returns an ordered list of:
       
    86 
       
    87           [(name, node, deltabasename, deltabasenode, deltacontent),...]
       
    88 
       
    89         where the chain is terminated by a full text entry with a nullid
       
    90         deltabasenode.
       
    91         """
       
    92         chain = self._getpartialchain(name, node)
       
    93         while chain[-1][ChainIndicies.BASENODE] != nullid:
       
    94             x, x, deltabasename, deltabasenode, x = chain[-1]
       
    95             try:
       
    96                 morechain = self._getpartialchain(deltabasename, deltabasenode)
       
    97                 chain.extend(morechain)
       
    98             except KeyError:
       
    99                 # If we allow incomplete chains, don't throw.
       
   100                 if not self.allowincomplete:
       
   101                     raise
       
   102                 break
       
   103 
       
   104         return chain
       
   105 
       
   106     @basestore.baseunionstore.retriable
       
   107     def getmeta(self, name, node):
       
   108         """Returns the metadata dict for given node."""
       
   109         for store in self.stores:
       
   110             try:
       
   111                 return store.getmeta(name, node)
       
   112             except KeyError:
       
   113                 pass
       
   114         raise KeyError((name, hex(node)))
       
   115 
       
   116     def getmetrics(self):
       
   117         metrics = [s.getmetrics() for s in self.stores]
       
   118         return shallowutil.sumdicts(*metrics)
       
   119 
       
   120     @basestore.baseunionstore.retriable
       
   121     def _getpartialchain(self, name, node):
       
   122         """Returns a partial delta chain for the given name/node pair.
       
   123 
       
   124         A partial chain is a chain that may not be terminated in a full-text.
       
   125         """
       
   126         for store in self.stores:
       
   127             try:
       
   128                 return store.getdeltachain(name, node)
       
   129             except KeyError:
       
   130                 pass
       
   131 
       
   132         raise KeyError((name, hex(node)))
       
   133 
       
   134     def add(self, name, node, data):
       
   135         raise RuntimeError("cannot add content only to remotefilelog "
       
   136                            "contentstore")
       
   137 
       
   138     def getmissing(self, keys):
       
   139         missing = keys
       
   140         for store in self.stores:
       
   141             if missing:
       
   142                 missing = store.getmissing(missing)
       
   143         return missing
       
   144 
       
   145     def addremotefilelognode(self, name, node, data):
       
   146         if self.writestore:
       
   147             self.writestore.addremotefilelognode(name, node, data)
       
   148         else:
       
   149             raise RuntimeError("no writable store configured")
       
   150 
       
   151     def markledger(self, ledger, options=None):
       
   152         for store in self.stores:
       
   153             store.markledger(ledger, options)
       
   154 
       
   155 class remotefilelogcontentstore(basestore.basestore):
       
   156     def __init__(self, *args, **kwargs):
       
   157         super(remotefilelogcontentstore, self).__init__(*args, **kwargs)
       
   158         self._threaddata = threading.local()
       
   159 
       
   160     def get(self, name, node):
       
   161         # return raw revision text
       
   162         data = self._getdata(name, node)
       
   163 
       
   164         offset, size, flags = shallowutil.parsesizeflags(data)
       
   165         content = data[offset:offset + size]
       
   166 
       
   167         ancestormap = shallowutil.ancestormap(data)
       
   168         p1, p2, linknode, copyfrom = ancestormap[node]
       
   169         copyrev = None
       
   170         if copyfrom:
       
   171             copyrev = hex(p1)
       
   172 
       
   173         self._updatemetacache(node, size, flags)
       
   174 
       
   175         # lfs tracks renames in its own metadata, remove hg copy metadata,
       
   176         # because copy metadata will be re-added by lfs flag processor.
       
   177         if flags & revlog.REVIDX_EXTSTORED:
       
   178             copyrev = copyfrom = None
       
   179         revision = shallowutil.createrevlogtext(content, copyfrom, copyrev)
       
   180         return revision
       
   181 
       
   182     def getdelta(self, name, node):
       
   183         # Since remotefilelog content stores only contain full texts, just
       
   184         # return that.
       
   185         revision = self.get(name, node)
       
   186         return revision, name, nullid, self.getmeta(name, node)
       
   187 
       
   188     def getdeltachain(self, name, node):
       
   189         # Since remotefilelog content stores just contain full texts, we return
       
   190         # a fake delta chain that just consists of a single full text revision.
       
   191         # The nullid in the deltabasenode slot indicates that the revision is a
       
   192         # fulltext.
       
   193         revision = self.get(name, node)
       
   194         return [(name, node, None, nullid, revision)]
       
   195 
       
   196     def getmeta(self, name, node):
       
   197         self._sanitizemetacache()
       
   198         if node != self._threaddata.metacache[0]:
       
   199             data = self._getdata(name, node)
       
   200             offset, size, flags = shallowutil.parsesizeflags(data)
       
   201             self._updatemetacache(node, size, flags)
       
   202         return self._threaddata.metacache[1]
       
   203 
       
   204     def add(self, name, node, data):
       
   205         raise RuntimeError("cannot add content only to remotefilelog "
       
   206                            "contentstore")
       
   207 
       
   208     def _sanitizemetacache(self):
       
   209         metacache = getattr(self._threaddata, 'metacache', None)
       
   210         if metacache is None:
       
   211             self._threaddata.metacache = (None, None) # (node, meta)
       
   212 
       
   213     def _updatemetacache(self, node, size, flags):
       
   214         self._sanitizemetacache()
       
   215         if node == self._threaddata.metacache[0]:
       
   216             return
       
   217         meta = {constants.METAKEYFLAG: flags,
       
   218                 constants.METAKEYSIZE: size}
       
   219         self._threaddata.metacache = (node, meta)
       
   220 
       
   221 class remotecontentstore(object):
       
   222     def __init__(self, ui, fileservice, shared):
       
   223         self._fileservice = fileservice
       
   224         # type(shared) is usually remotefilelogcontentstore
       
   225         self._shared = shared
       
   226 
       
   227     def get(self, name, node):
       
   228         self._fileservice.prefetch([(name, hex(node))], force=True,
       
   229                                    fetchdata=True)
       
   230         return self._shared.get(name, node)
       
   231 
       
   232     def getdelta(self, name, node):
       
   233         revision = self.get(name, node)
       
   234         return revision, name, nullid, self._shared.getmeta(name, node)
       
   235 
       
   236     def getdeltachain(self, name, node):
       
   237         # Since our remote content stores just contain full texts, we return a
       
   238         # fake delta chain that just consists of a single full text revision.
       
   239         # The nullid in the deltabasenode slot indicates that the revision is a
       
   240         # fulltext.
       
   241         revision = self.get(name, node)
       
   242         return [(name, node, None, nullid, revision)]
       
   243 
       
   244     def getmeta(self, name, node):
       
   245         self._fileservice.prefetch([(name, hex(node))], force=True,
       
   246                                    fetchdata=True)
       
   247         return self._shared.getmeta(name, node)
       
   248 
       
   249     def add(self, name, node, data):
       
   250         raise RuntimeError("cannot add to a remote store")
       
   251 
       
   252     def getmissing(self, keys):
       
   253         return keys
       
   254 
       
   255     def markledger(self, ledger, options=None):
       
   256         pass
       
   257 
       
   258 class manifestrevlogstore(object):
       
   259     def __init__(self, repo):
       
   260         self._store = repo.store
       
   261         self._svfs = repo.svfs
       
   262         self._revlogs = dict()
       
   263         self._cl = revlog.revlog(self._svfs, '00changelog.i')
       
   264         self._repackstartlinkrev = 0
       
   265 
       
   266     def get(self, name, node):
       
   267         return self._revlog(name).revision(node, raw=True)
       
   268 
       
   269     def getdelta(self, name, node):
       
   270         revision = self.get(name, node)
       
   271         return revision, name, nullid, self.getmeta(name, node)
       
   272 
       
   273     def getdeltachain(self, name, node):
       
   274         revision = self.get(name, node)
       
   275         return [(name, node, None, nullid, revision)]
       
   276 
       
   277     def getmeta(self, name, node):
       
   278         rl = self._revlog(name)
       
   279         rev = rl.rev(node)
       
   280         return {constants.METAKEYFLAG: rl.flags(rev),
       
   281                 constants.METAKEYSIZE: rl.rawsize(rev)}
       
   282 
       
   283     def getancestors(self, name, node, known=None):
       
   284         if known is None:
       
   285             known = set()
       
   286         if node in known:
       
   287             return []
       
   288 
       
   289         rl = self._revlog(name)
       
   290         ancestors = {}
       
   291         missing = set((node,))
       
   292         for ancrev in rl.ancestors([rl.rev(node)], inclusive=True):
       
   293             ancnode = rl.node(ancrev)
       
   294             missing.discard(ancnode)
       
   295 
       
   296             p1, p2 = rl.parents(ancnode)
       
   297             if p1 != nullid and p1 not in known:
       
   298                 missing.add(p1)
       
   299             if p2 != nullid and p2 not in known:
       
   300                 missing.add(p2)
       
   301 
       
   302             linknode = self._cl.node(rl.linkrev(ancrev))
       
   303             ancestors[rl.node(ancrev)] = (p1, p2, linknode, '')
       
   304             if not missing:
       
   305                 break
       
   306         return ancestors
       
   307 
       
   308     def getnodeinfo(self, name, node):
       
   309         cl = self._cl
       
   310         rl = self._revlog(name)
       
   311         parents = rl.parents(node)
       
   312         linkrev = rl.linkrev(rl.rev(node))
       
   313         return (parents[0], parents[1], cl.node(linkrev), None)
       
   314 
       
   315     def add(self, *args):
       
   316         raise RuntimeError("cannot add to a revlog store")
       
   317 
       
   318     def _revlog(self, name):
       
   319         rl = self._revlogs.get(name)
       
   320         if rl is None:
       
   321             revlogname = '00manifesttree.i'
       
   322             if name != '':
       
   323                 revlogname = 'meta/%s/00manifest.i' % name
       
   324             rl = revlog.revlog(self._svfs, revlogname)
       
   325             self._revlogs[name] = rl
       
   326         return rl
       
   327 
       
   328     def getmissing(self, keys):
       
   329         missing = []
       
   330         for name, node in keys:
       
   331             mfrevlog = self._revlog(name)
       
   332             if node not in mfrevlog.nodemap:
       
   333                 missing.append((name, node))
       
   334 
       
   335         return missing
       
   336 
       
   337     def setrepacklinkrevrange(self, startrev, endrev):
       
   338         self._repackstartlinkrev = startrev
       
   339         self._repackendlinkrev = endrev
       
   340 
       
   341     def markledger(self, ledger, options=None):
       
   342         if options and options.get(constants.OPTION_PACKSONLY):
       
   343             return
       
   344         treename = ''
       
   345         rl = revlog.revlog(self._svfs, '00manifesttree.i')
       
   346         startlinkrev = self._repackstartlinkrev
       
   347         endlinkrev = self._repackendlinkrev
       
   348         for rev in pycompat.xrange(len(rl) - 1, -1, -1):
       
   349             linkrev = rl.linkrev(rev)
       
   350             if linkrev < startlinkrev:
       
   351                 break
       
   352             if linkrev > endlinkrev:
       
   353                 continue
       
   354             node = rl.node(rev)
       
   355             ledger.markdataentry(self, treename, node)
       
   356             ledger.markhistoryentry(self, treename, node)
       
   357 
       
   358         for path, encoded, size in self._store.datafiles():
       
   359             if path[:5] != 'meta/' or path[-2:] != '.i':
       
   360                 continue
       
   361 
       
   362             treename = path[5:-len('/00manifest.i')]
       
   363 
       
   364             rl = revlog.revlog(self._svfs, path)
       
   365             for rev in pycompat.xrange(len(rl) - 1, -1, -1):
       
   366                 linkrev = rl.linkrev(rev)
       
   367                 if linkrev < startlinkrev:
       
   368                     break
       
   369                 if linkrev > endlinkrev:
       
   370                     continue
       
   371                 node = rl.node(rev)
       
   372                 ledger.markdataentry(self, treename, node)
       
   373                 ledger.markhistoryentry(self, treename, node)
       
   374 
       
   375     def cleanup(self, ledger):
       
   376         pass