changegroup: differentiate between fulltext and diff based deltas
authorGregory Szorc <gregory.szorc@gmail.com>
Wed, 08 Aug 2018 16:01:26 -0700
changeset 39016 39b8277e2115
parent 39015 ad9eccedb379
child 39017 ef3d3a2f9aa5
changegroup: differentiate between fulltext and diff based deltas Previously, revisiondelta encoded a delta and an optional prefix containing a delta header. The underlying code could populate the delta with either a real delta or a fulltext revision. Following the theme of wanting to defer serialization of revision data to the changegroup format as long as possible, it seems prudent for the revision delta instance to capture what type of data is being represented. This could possibly allow us to encode revision data differently in the future. But for the short term, it makes the behavior of a revisiondelta more explicit. Differential Revision: https://phab.mercurial-scm.org/D4213
mercurial/changegroup.py
--- a/mercurial/changegroup.py	Wed Aug 08 15:28:22 2018 -0700
+++ b/mercurial/changegroup.py	Wed Aug 08 16:01:26 2018 -0700
@@ -507,6 +507,8 @@
 
     Captured data is sufficient to serialize the delta into multiple
     formats.
+
+    ``revision`` and ``delta`` are mutually exclusive.
     """
     # 20 byte node of this revision.
     node = attr.ib()
@@ -519,17 +521,40 @@
     linknode = attr.ib()
     # 2 bytes of flags to apply to revision data.
     flags = attr.ib()
-    # Iterable of chunks holding raw delta data.
-    deltachunks = attr.ib()
+    # Size of base revision this delta is against. May be None if
+    # basenode is nullid.
+    baserevisionsize = attr.ib()
+    # Raw fulltext revision data.
+    revision = attr.ib()
+    # Delta between the basenode and node.
+    delta = attr.ib()
 
 def _revisiondeltatochunks(delta, headerfn):
     """Serialize a revisiondelta to changegroup chunks."""
+
+    # The captured revision delta may be encoded as a delta against
+    # a base revision or as a full revision. The changegroup format
+    # requires that everything on the wire be deltas. So for full
+    # revisions, we need to invent a header that says to rewrite
+    # data.
+
+    if delta.delta is not None:
+        prefix, data = b'', delta.delta
+    elif delta.basenode == nullid:
+        data = delta.revision
+        prefix = mdiff.trivialdiffheader(len(data))
+    else:
+        data = delta.revision
+        prefix = mdiff.replacediffheader(delta.baserevisionsize,
+                                         len(data))
+
     meta = headerfn(delta)
-    l = len(meta) + sum(len(x) for x in delta.deltachunks)
-    yield chunkheader(l)
+
+    yield chunkheader(len(meta) + len(prefix) + len(data))
     yield meta
-    for x in delta.deltachunks:
-        yield x
+    if prefix:
+        yield prefix
+    yield data
 
 def _sortnodesnormal(store, nodes, reorder):
     """Sort nodes for changegroup generation and turn into revnums."""
@@ -568,22 +593,24 @@
     p1, p2 = store.parentrevs(rev)
     base = deltaparentfn(store, rev, p1, p2, prev)
 
-    prefix = ''
+    revision = None
+    delta = None
+    baserevisionsize = None
+
     if store.iscensored(base) or store.iscensored(rev):
         try:
-            delta = store.revision(node, raw=True)
+            revision = store.revision(node, raw=True)
         except error.CensoredNodeError as e:
-            delta = e.tombstone
-        if base == nullrev:
-            prefix = mdiff.trivialdiffheader(len(delta))
-        else:
-            baselen = store.rawsize(base)
-            prefix = mdiff.replacediffheader(baselen, len(delta))
+            revision = e.tombstone
+
+        if base != nullrev:
+            baserevisionsize = store.rawsize(base)
+
     elif base == nullrev:
-        delta = store.revision(node, raw=True)
-        prefix = mdiff.trivialdiffheader(len(delta))
+        revision = store.revision(node, raw=True)
     else:
         delta = store.revdiff(base, rev)
+
     p1n, p2n = store.parents(node)
 
     return revisiondelta(
@@ -593,7 +620,9 @@
         basenode=store.node(base),
         linknode=linknode,
         flags=store.flags(rev),
-        deltachunks=(prefix, delta),
+        baserevisionsize=baserevisionsize,
+        revision=revision,
+        delta=delta,
     )
 
 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
@@ -677,8 +706,6 @@
     flags |= revlog.REVIDX_ELLIPSIS
 
     # TODO: try and actually send deltas for ellipsis data blocks
-    data = store.revision(n)
-    diffheader = mdiff.trivialdiffheader(len(data))
 
     return revisiondelta(
         node=n,
@@ -687,7 +714,9 @@
         basenode=nullid,
         linknode=linknode,
         flags=flags,
-        deltachunks=(diffheader, data),
+        baserevisionsize=None,
+        revision=store.revision(n),
+        delta=None,
     )
 
 def deltagroup(repo, revs, store, ischangelog, lookup, deltaparentfn,