changegroup: capture revision delta in a data structure
authorGregory Szorc <gregory.szorc@gmail.com>
Fri, 03 Aug 2018 10:05:26 -0700
changeset 38893 23d582caae30
parent 38892 eb022ce9e505
child 38894 19344024a8e1
changegroup: capture revision delta in a data structure The current changegroup generation code is tightly coupled to the revlog API. This tight coupling makes it difficult to implement alternate storage backends without requiring a large surface area of the revlog API to be exposed. This is not desirable. In order to support changegroup generation with non-revlog storage, we'll need to abstract the concept of delta generation. This commit is the first step down that road. We introduce a data structure for representing a delta in a changegroup. The API still leaves a lot to be desired. But at least we now have separation between data and actions performed on it. As part of this, we tweak behavior slightly: we no longer concatenate the delta prefix with the metadata header. Instead, we track and emit the prefix as a separate chunk. This shouldn't have any meaningful impact since all the chunks just get sent to the wire, the compressor, etc. Because we're introducing a new object, this does add some overhead to changegroup execution. `hg perfchangegroupchangelog` on my clone of the Mercurial repo (~40,000 visible revisions in the changelog) slows down a bit: ! wall 1.268600 comb 1.270000 user 1.270000 sys 0.000000 (best of 8) ! wall 1.419479 comb 1.410000 user 1.410000 sys 0.000000 (best of 8) With for `hg bundle -t none-v2 -a /dev/null`: before: real 6.610 secs (user 6.460+0.000 sys 0.140+0.000) after: real 7.210 secs (user 7.060+0.000 sys 0.140+0.000) I plan to claw back this regression in future commits. And I may even do away with this data structure once the refactor is complete. For now, it makes things easier to comprehend. Differential Revision: https://phab.mercurial-scm.org/D4075
mercurial/changegroup.py
--- a/mercurial/changegroup.py	Thu Aug 02 16:36:40 2018 -0700
+++ b/mercurial/changegroup.py	Fri Aug 03 10:05:26 2018 -0700
@@ -19,6 +19,10 @@
     short,
 )
 
+from .thirdparty import (
+    attr,
+)
+
 from . import (
     dagutil,
     error,
@@ -494,6 +498,26 @@
             return d
         return readexactly(self._fh, n)
 
+@attr.s(slots=True, frozen=True)
+class revisiondelta(object):
+    """Describes a delta entry in a changegroup.
+
+    Captured data is sufficient to serialize the delta into multiple
+    formats.
+    """
+    # 20 byte node of this revision.
+    node = attr.ib()
+    # 20 byte nodes of parent revisions.
+    p1node = attr.ib()
+    p2node = attr.ib()
+    # 20 byte node of node this delta is against.
+    basenode = attr.ib()
+    # 20 byte node of changeset revision this delta is associated with.
+    linknode = attr.ib()
+    # 2 bytes of flags to apply to revision data.
+    flags = attr.ib()
+    # Iterable of chunks holding raw delta data.
+    deltachunks = attr.ib()
 
 class cg1packer(object):
     deltaheader = _CHANGEGROUPV1_DELTA_HEADER
@@ -899,13 +923,25 @@
 
     def revchunk(self, store, rev, prev, linknode):
         if util.safehasattr(self, 'full_nodes'):
-            fn = self._revchunknarrow
+            fn = self._revisiondeltanarrow
         else:
-            fn = self._revchunknormal
+            fn = self._revisiondeltanormal
+
+        delta = fn(store, rev, prev, linknode)
+        if not delta:
+            return
 
-        return fn(store, rev, prev, linknode)
+        meta = self.builddeltaheader(delta.node, delta.p1node, delta.p2node,
+                                     delta.basenode, delta.linknode,
+                                     delta.flags)
+        l = len(meta) + sum(len(x) for x in delta.deltachunks)
 
-    def _revchunknormal(self, store, rev, prev, linknode):
+        yield chunkheader(l)
+        yield meta
+        for x in delta.deltachunks:
+            yield x
+
+    def _revisiondeltanormal(self, store, rev, prev, linknode):
         node = store.node(rev)
         p1, p2 = store.parentrevs(rev)
         base = self.deltaparent(store, rev, p1, p2, prev)
@@ -927,16 +963,18 @@
         else:
             delta = store.revdiff(base, rev)
         p1n, p2n = store.parents(node)
-        basenode = store.node(base)
-        flags = store.flags(rev)
-        meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
-        meta += prefix
-        l = len(meta) + len(delta)
-        yield chunkheader(l)
-        yield meta
-        yield delta
 
-    def _revchunknarrow(self, store, rev, prev, linknode):
+        return revisiondelta(
+            node=node,
+            p1node=p1n,
+            p2node=p2n,
+            basenode=store.node(base),
+            linknode=linknode,
+            flags=store.flags(rev),
+            deltachunks=(prefix, delta),
+        )
+
+    def _revisiondeltanarrow(self, store, rev, prev, linknode):
         # build up some mapping information that's useful later. See
         # the local() nested function below.
         if not self.changelog_done:
@@ -950,9 +988,7 @@
         # This is a node to send in full, because the changeset it
         # corresponds to was a full changeset.
         if linknode in self.full_nodes:
-            for x in self._revchunknormal(store, rev, prev, linknode):
-                yield x
-            return
+            return self._revisiondeltanormal(store, rev, prev, linknode)
 
         # At this point, a node can either be one we should skip or an
         # ellipsis. If it's not an ellipsis, bail immediately.
@@ -1043,16 +1079,20 @@
         p1n, p2n = store.node(p1), store.node(p2)
         flags = store.flags(rev)
         flags |= revlog.REVIDX_ELLIPSIS
-        meta = self.builddeltaheader(
-            n, p1n, p2n, nullid, linknode, flags)
+
         # TODO: try and actually send deltas for ellipsis data blocks
         data = store.revision(n)
         diffheader = mdiff.trivialdiffheader(len(data))
-        l = len(meta) + len(diffheader) + len(data)
-        yield ''.join((chunkheader(l),
-                       meta,
-                       diffheader,
-                       data))
+
+        return revisiondelta(
+            node=n,
+            p1node=p1n,
+            p2node=p2n,
+            basenode=nullid,
+            linknode=linknode,
+            flags=flags,
+            deltachunks=(diffheader, data),
+        )
 
     def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
         # do nothing with basenode, it is implicitly the previous one in HG10