repository: establish API for emitting revision deltas
authorGregory Szorc <gregory.szorc@gmail.com>
Thu, 09 Aug 2018 16:02:14 -0700
changeset 39231 b41d023a412a
parent 39230 b518d495a560
child 39232 0a5b20c107a6
repository: establish API for emitting revision deltas With our revision delta and revision delta request interfaces defined, it is now time to define a method on storage interfaces for using them. So far, the only storage interface that is well-defined and used is file storage. So that is the only interface we need to add a method on. We define an ``emitrevisiondeltas()`` method that takes an iterable of ``irevisiondeltarequest``s and turns them into ``irevisiondelta`` instances. changegroup._handlerevisiondeltarequest() and the looping logic from changegroup.deltagroup() has effectively been moved to revlog.emitrevisiondeltas(). Our filelog wrapper class proxies its emitrevisiondeltas() to the internal revlog instance. The simple store test extension used to verify sanity of storage abstractions has also implemented emitrevisiondeltas() for file storage and the test harness when run with this extension doesn't seem to exhibit any regressions. Rather than create a shared type to represent revision deltas, each storage backend has its own type and the class name identifies where the revision delta was derived from. Differential Revision: https://phab.mercurial-scm.org/D4226
mercurial/changegroup.py
mercurial/filelog.py
mercurial/repository.py
mercurial/revlog.py
tests/simplestorerepo.py
tests/test-check-interfaces.py
--- a/mercurial/changegroup.py	Thu Aug 09 15:40:14 2018 -0700
+++ b/mercurial/changegroup.py	Thu Aug 09 16:02:14 2018 -0700
@@ -31,7 +31,6 @@
     phases,
     pycompat,
     repository,
-    revlog,
     util,
 )
 
@@ -512,19 +511,6 @@
     basenode = attr.ib()
     ellipsis = attr.ib(default=False)
 
-@interfaceutil.implementer(repository.irevisiondelta)
-@attr.s(slots=True, frozen=True)
-class revisiondelta(object):
-    node = attr.ib()
-    p1node = attr.ib()
-    p2node = attr.ib()
-    basenode = attr.ib()
-    linknode = attr.ib()
-    flags = attr.ib()
-    baserevisionsize = attr.ib()
-    revision = attr.ib()
-    delta = attr.ib()
-
 def _revisiondeltatochunks(delta, headerfn):
     """Serialize a revisiondelta to changegroup chunks."""
 
@@ -583,77 +569,6 @@
     key = lambda n: cl.rev(lookup(n))
     return [store.rev(n) for n in sorted(nodes, key=key)]
 
-def _handlerevisiondeltarequest(store, request, prevnode):
-    """Obtain a revisiondelta from a revisiondeltarequest"""
-
-    node = request.node
-    rev = store.rev(node)
-
-    # Requesting a full revision.
-    if request.basenode == nullid:
-        baserev = nullrev
-    # Requesting an explicit revision.
-    elif request.basenode is not None:
-        baserev = store.rev(request.basenode)
-    # Allowing us to choose.
-    else:
-        p1, p2 = store.parentrevs(rev)
-        dp = store.deltaparent(rev)
-
-        if dp == nullrev and store.storedeltachains:
-            # Avoid sending full revisions when delta parent is null. Pick prev
-            # in that case. It's tempting to pick p1 in this case, as p1 will
-            # be smaller in the common case. However, computing a delta against
-            # p1 may require resolving the raw text of p1, which could be
-            # expensive. The revlog caches should have prev cached, meaning
-            # less CPU for changegroup generation. There is likely room to add
-            # a flag and/or config option to control this behavior.
-            baserev = store.rev(prevnode)
-        elif dp == nullrev:
-            # revlog is configured to use full snapshot for a reason,
-            # stick to full snapshot.
-            baserev = nullrev
-        elif dp not in (p1, p2, store.rev(prevnode)):
-            # Pick prev when we can't be sure remote has the base revision.
-            baserev = store.rev(prevnode)
-        else:
-            baserev = dp
-
-        if baserev != nullrev and not store.candelta(baserev, rev):
-            baserev = nullrev
-
-    revision = None
-    delta = None
-    baserevisionsize = None
-
-    if store.iscensored(baserev) or store.iscensored(rev):
-        try:
-            revision = store.revision(node, raw=True)
-        except error.CensoredNodeError as e:
-            revision = e.tombstone
-
-        if baserev != nullrev:
-            baserevisionsize = store.rawsize(baserev)
-
-    elif baserev == nullrev:
-        revision = store.revision(node, raw=True)
-    else:
-        delta = store.revdiff(baserev, rev)
-
-    extraflags = revlog.REVIDX_ELLIPSIS if request.ellipsis else 0
-
-    return revisiondelta(
-        node=node,
-        p1node=request.p1node,
-        p2node=request.p2node,
-        linknode=request.linknode,
-        basenode=store.node(baserev),
-        flags=store.flags(rev) | extraflags,
-        baserevisionsize=baserevisionsize,
-        revision=revision,
-        delta=delta,
-    )
-
 def _makenarrowdeltarequest(cl, store, ischangelog, rev, node, linkrev,
                             linknode, clrevtolocalrev, fullclnodes,
                             precomputedellipsis):
@@ -832,17 +747,12 @@
         progress = repo.ui.makeprogress(_('bundling'), unit=units,
                                         total=len(requests))
 
-    prevnode = store.node(revs[0])
-    for i, request in enumerate(requests):
+    for i, delta in enumerate(store.emitrevisiondeltas(requests)):
         if progress:
             progress.update(i + 1)
 
-        delta = _handlerevisiondeltarequest(store, request, prevnode)
-
         yield delta
 
-        prevnode = request.node
-
     if progress:
         progress.complete()
 
--- a/mercurial/filelog.py	Thu Aug 09 15:40:14 2018 -0700
+++ b/mercurial/filelog.py	Thu Aug 09 16:02:14 2018 -0700
@@ -95,6 +95,9 @@
     def revdiff(self, rev1, rev2):
         return self._revlog.revdiff(rev1, rev2)
 
+    def emitrevisiondeltas(self, requests):
+        return self._revlog.emitrevisiondeltas(requests)
+
     def addrevision(self, revisiondata, transaction, linkrev, p1, p2,
                     node=None, flags=revlog.REVIDX_DEFAULT_FLAGS,
                     cachedelta=None):
--- a/mercurial/repository.py	Thu Aug 09 15:40:14 2018 -0700
+++ b/mercurial/repository.py	Thu Aug 09 16:02:14 2018 -0700
@@ -621,6 +621,30 @@
         revision data.
         """
 
+    def emitrevisiondeltas(requests):
+        """Produce ``irevisiondelta`` from ``irevisiondeltarequest``s.
+
+        Given an iterable of objects conforming to the ``irevisiondeltarequest``
+        interface, emits objects conforming to the ``irevisiondelta``
+        interface.
+
+        This method is a generator.
+
+        ``irevisiondelta`` should be emitted in the same order of
+        ``irevisiondeltarequest`` that was passed in.
+
+        The emitted objects MUST conform by the results of
+        ``irevisiondeltarequest``. Namely, they must respect any requests
+        for building a delta from a specific ``basenode`` if defined.
+
+        When sending deltas, implementations must take into account whether
+        the client has the base delta before encoding a delta against that
+        revision. A revision encountered previously in ``requests`` is
+        always a suitable base revision. An example of a bad delta is a delta
+        against a non-ancestor revision. Another example of a bad delta is a
+        delta against a censored revision.
+        """
+
 class ifilemutation(interfaceutil.Interface):
     """Storage interface for mutation events of a tracked file."""
 
--- a/mercurial/revlog.py	Thu Aug 09 15:40:14 2018 -0700
+++ b/mercurial/revlog.py	Thu Aug 09 16:02:14 2018 -0700
@@ -45,10 +45,12 @@
     mdiff,
     policy,
     pycompat,
+    repository,
     templatefilters,
     util,
 )
 from .utils import (
+    interfaceutil,
     stringutil,
 )
 
@@ -821,6 +823,19 @@
     cachedelta = attr.ib()
     flags = attr.ib()
 
+@interfaceutil.implementer(repository.irevisiondelta)
+@attr.s(slots=True, frozen=True)
+class revlogrevisiondelta(object):
+    node = attr.ib()
+    p1node = attr.ib()
+    p2node = attr.ib()
+    basenode = attr.ib()
+    linknode = attr.ib()
+    flags = attr.ib()
+    baserevisionsize = attr.ib()
+    revision = attr.ib()
+    delta = attr.ib()
+
 # index v0:
 #  4 bytes: offset
 #  4 bytes: compressed length
@@ -2950,6 +2965,87 @@
             res.append(self.datafile)
         return res
 
+    def emitrevisiondeltas(self, requests):
+        frev = self.rev
+
+        prevrev = None
+        for request in requests:
+            node = request.node
+            rev = frev(node)
+
+            if prevrev is None:
+                prevrev = self.index[rev][5]
+
+            # Requesting a full revision.
+            if request.basenode == nullid:
+                baserev = nullrev
+            # Requesting an explicit revision.
+            elif request.basenode is not None:
+                baserev = frev(request.basenode)
+            # Allowing us to choose.
+            else:
+                p1rev, p2rev = self.parentrevs(rev)
+                deltaparentrev = self.deltaparent(rev)
+
+                # Avoid sending full revisions when delta parent is null. Pick
+                # prev in that case. It's tempting to pick p1 in this case, as
+                # p1 will be smaller in the common case. However, computing a
+                # delta against p1 may require resolving the raw text of p1,
+                # which could be expensive. The revlog caches should have prev
+                # cached, meaning less CPU for delta generation. There is
+                # likely room to add a flag and/or config option to control this
+                # behavior.
+                if deltaparentrev == nullrev and self.storedeltachains:
+                    baserev = prevrev
+
+                # Revlog is configured to use full snapshot for a reason.
+                # Stick to full snapshot.
+                elif deltaparentrev == nullrev:
+                    baserev = nullrev
+
+                # Pick previous when we can't be sure the base is available
+                # on consumer.
+                elif deltaparentrev not in (p1rev, p2rev, prevrev):
+                    baserev = prevrev
+                else:
+                    baserev = deltaparentrev
+
+                if baserev != nullrev and not self.candelta(baserev, rev):
+                    baserev = nullrev
+
+            revision = None
+            delta = None
+            baserevisionsize = None
+
+            if self.iscensored(baserev) or self.iscensored(rev):
+                try:
+                    revision = self.revision(node, raw=True)
+                except error.CensoredNodeError as e:
+                    revision = e.tombstone
+
+                if baserev != nullrev:
+                    baserevisionsize = self.rawsize(baserev)
+
+            elif baserev == nullrev:
+                revision = self.revision(node, raw=True)
+            else:
+                delta = self.revdiff(baserev, rev)
+
+            extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
+
+            yield revlogrevisiondelta(
+                node=node,
+                p1node=request.p1node,
+                p2node=request.p2node,
+                linknode=request.linknode,
+                basenode=self.node(baserev),
+                flags=self.flags(rev) | extraflags,
+                baserevisionsize=baserevisionsize,
+                revision=revision,
+                delta=delta)
+
+            prevrev = rev
+
     DELTAREUSEALWAYS = 'always'
     DELTAREUSESAMEREVS = 'samerevs'
     DELTAREUSENEVER = 'never'
--- a/tests/simplestorerepo.py	Thu Aug 09 15:40:14 2018 -0700
+++ b/tests/simplestorerepo.py	Thu Aug 09 16:02:14 2018 -0700
@@ -22,6 +22,7 @@
     nullrev,
 )
 from mercurial.thirdparty import (
+    attr,
     cbor,
 )
 from mercurial import (
@@ -60,6 +61,19 @@
     if not isinstance(rev, int):
         raise ValueError('expected int')
 
+@interfaceutil.implementer(repository.irevisiondelta)
+@attr.s(slots=True, frozen=True)
+class simplestorerevisiondelta(object):
+    node = attr.ib()
+    p1node = attr.ib()
+    p2node = attr.ib()
+    basenode = attr.ib()
+    linknode = attr.ib()
+    flags = attr.ib()
+    baserevisionsize = attr.ib()
+    revision = attr.ib()
+    delta = attr.ib()
+
 @interfaceutil.implementer(repository.ifilestorage)
 class filestorage(object):
     """Implements storage for a tracked path.
@@ -500,6 +514,54 @@
         return mdiff.textdiff(self.revision(node1, raw=True),
                               self.revision(node2, raw=True))
 
+    def emitrevisiondeltas(self, requests):
+        for request in requests:
+            node = request.node
+            rev = self.rev(node)
+
+            if request.basenode == nullid:
+                baserev = nullrev
+            elif request.basenode is not None:
+                baserev = self.rev(request.basenode)
+            else:
+                # This is a test extension and we can do simple things
+                # for choosing a delta parent.
+                baserev = self.deltaparent(rev)
+
+                if baserev != nullrev and not self.candelta(baserev, rev):
+                    baserev = nullrev
+
+            revision = None
+            delta = None
+            baserevisionsize = None
+
+            if self.iscensored(baserev) or self.iscensored(rev):
+                try:
+                    revision = self.revision(node, raw=True)
+                except error.CensoredNodeError as e:
+                    revision = e.tombstone
+
+                if baserev != nullrev:
+                    baserevisionsize = self.rawsize(baserev)
+
+            elif baserev == nullrev:
+                revision = self.revision(node, raw=True)
+            else:
+                delta = self.revdiff(baserev, rev)
+
+            extraflags = revlog.REVIDX_ELLIPSIS if request.ellipsis else 0
+
+            yield simplestorerevisiondelta(
+                node=node,
+                p1node=request.p1node,
+                p2node=request.p2node,
+                linknode=request.linknode,
+                basenode=self.node(baserev),
+                flags=self.flags(rev) | extraflags,
+                baserevisionsize=baserevisionsize,
+                revision=revision,
+                delta=delta)
+
     def headrevs(self):
         # Assume all revisions are heads by default.
         revishead = {rev: True for rev in self._indexbyrev}
--- a/tests/test-check-interfaces.py	Thu Aug 09 15:40:14 2018 -0700
+++ b/tests/test-check-interfaces.py	Thu Aug 09 16:02:14 2018 -0700
@@ -29,6 +29,7 @@
     manifest,
     pycompat,
     repository,
+    revlog,
     sshpeer,
     statichttprepo,
     ui as uimod,
@@ -198,11 +199,11 @@
     checkzobject(mctx.read())
 
     ziverify.verifyClass(repository.irevisiondelta,
-                         changegroup.revisiondelta)
+                         revlog.revlogrevisiondelta)
     ziverify.verifyClass(repository.irevisiondeltarequest,
                          changegroup.revisiondeltarequest)
 
-    rd = changegroup.revisiondelta(
+    rd = revlog.revlogrevisiondelta(
         node=b'',
         p1node=b'',
         p2node=b'',