changegroup: fix file linkrevs during reorders (issue4462) stable
authorDurham Goode <durham@fb.com>
Thu, 20 Nov 2014 16:30:57 -0800
branchstable
changeset 23381 cc0ff93d0c0c
parent 23377 c00b156d6e76
child 23382 a81c76106d90
child 23383 7f8d27e1f862
changegroup: fix file linkrevs during reorders (issue4462) Previously, if reorder was true during the creation of a changegroup bundle, it was possible that the manifest and filelogs would be reordered such that the resulting bundle filelog had a linkrev that pointed to a commit that was not the earliest instance of the filelog revision. For example: With commits: 0<-1<---3<-4 \ / --2<--- if 2 and 3 added the same version of a file, if the manifests of 2 and 3 have their order reversed, but the changelog did not, it could produce a filelog with linkrevs 0<-3 instead of 0<-2, which meant if commit 3 was stripped, it would delete that file data from the repository and commit 2 would be corrupt (as would any future pulls that tried to build upon that version of the file). The fix is to make the linkrev fixup smarter. Previously it considered the first manifest that added a file to be the first commit that added that file, which is not true. Now, for every file revision we add to the bundle we make sure we attach it to the earliest applicable linkrev.
mercurial/changegroup.py
tests/test-generaldelta.t
--- a/mercurial/changegroup.py	Fri Nov 21 13:58:49 2014 +0800
+++ b/mercurial/changegroup.py	Thu Nov 20 16:30:57 2014 -0800
@@ -316,6 +316,7 @@
         # for progress output
         msgbundling = _('bundling')
 
+        clrevorder = {}
         mfs = {} # needed manifests
         fnodes = {} # needed file nodes
         changedfiles = set()
@@ -325,6 +326,7 @@
         # Returns the linkrev node (identity in the changelog case).
         def lookupcl(x):
             c = cl.read(x)
+            clrevorder[x] = len(clrevorder)
             changedfiles.update(c[3])
             # record the first changeset introducing this manifest version
             mfs.setdefault(c[0], x)
@@ -340,13 +342,16 @@
         # Returns the linkrev node (collected in lookupcl).
         def lookupmf(x):
             clnode = mfs[x]
-            if not fastpathlinkrev:
+            if not fastpathlinkrev or reorder:
                 mdata = mf.readfast(x)
                 for f, n in mdata.iteritems():
                     if f in changedfiles:
                         # record the first changeset introducing this filelog
                         # version
-                        fnodes.setdefault(f, {}).setdefault(n, clnode)
+                        fclnodes = fnodes.setdefault(f, {})
+                        fclnode = fclnodes.setdefault(n, clnode)
+                        if clrevorder[clnode] < clrevorder[fclnode]:
+                            fclnodes[n] = clnode
             return clnode
 
         mfnodes = self.prune(mf, mfs, commonrevs, source)
@@ -359,7 +364,7 @@
         needed = set(cl.rev(x) for x in clnodes)
 
         def linknodes(filerevlog, fname):
-            if fastpathlinkrev:
+            if fastpathlinkrev and not reorder:
                 llr = filerevlog.linkrev
                 def genfilenodes():
                     for r in filerevlog:
--- a/tests/test-generaldelta.t	Fri Nov 21 13:58:49 2014 +0800
+++ b/tests/test-generaldelta.t	Thu Nov 20 16:30:57 2014 -0800
@@ -22,3 +22,50 @@
   >>> gdsize = os.stat("gdrepo/.hg/store/00manifest.i").st_size
   >>> if regsize < gdsize:
   ...     print 'generaldata increased size of manifest'
+
+Verify rev reordering doesnt create invalid bundles (issue4462)
+This requires a commit tree that when pulled will reorder manifest revs such
+that the second manifest to create a file rev will be ordered before the first
+manifest to create that file rev. We also need to do a partial pull to ensure
+reordering happens. At the end we verify the linkrev points at the earliest
+commit.
+
+  $ hg init server --config format.generaldelta=True
+  $ cd server
+  $ touch a
+  $ hg commit -Aqm a
+  $ echo x > x
+  $ echo y > y
+  $ hg commit -Aqm xy
+  $ hg up -q '.^'
+  $ echo x > x
+  $ echo z > z
+  $ hg commit -Aqm xz
+  $ hg up -q 1
+  $ echo b > b
+  $ hg commit -Aqm b
+  $ hg merge -q 2
+  $ hg commit -Aqm merge
+  $ echo c > c
+  $ hg commit -Aqm c
+  $ hg log -G -T '{rev} {shortest(node)} {desc}'
+  @  5 ebb8 c
+  |
+  o    4 baf7 merge
+  |\
+  | o  3 a129 b
+  | |
+  o |  2 958c xz
+  | |
+  | o  1 f00c xy
+  |/
+  o  0 3903 a
+  
+  $ cd ..
+  $ hg init client
+  $ cd client
+  $ hg pull -q ../server -r 4
+  $ hg debugindex x
+     rev    offset  length   base linkrev nodeid       p1           p2
+       0         0       3      0       1 1406e7411862 000000000000 000000000000
+