copies: do copy tracing based on ctx.p[12]copies() if configured
authorMartin von Zweigbergk <martinvonz@google.com>
Tue, 19 Feb 2019 15:42:45 -0800
changeset 41756 49ad315b39ee
parent 41755 a4358f7345b4
child 41757 062e9ebc3215
copies: do copy tracing based on ctx.p[12]copies() if configured This adds an option to do copy tracing in a changeset-optimized way. If the metadata is stored in filelogs, this is obviously going to be suboptimal. The point is that it provides a way of transitioning to changeset-stored metadata. Some of the tests behave a little differently, but they all seem resonable to me. The config option may very well be renamed later when it's clearer what options we want and how they will behave. When the test suite is run with --extra-config-opt to use the new copy tracing, all tests pass, besides test-copies.t (which fails in the same way as you can see in this patch). `hg debugpathcopies 4.0 4.8` reports 82 copies. With this option enabled, the only difference is this: -mercurial/pure/bdiff.py -> mercurial/cffi/bdiff.py +setup_bdiff_cffi.py -> mercurial/cffi/bdiff.py I believe that happened because it was renamed in different ways on different sides of a merge and the new algorithm arbitrarily prefers copies that happened on p1. The runtime is about 0.85 seconds with the old copy tracing and 5.7 seconds with the new copy tracing. That's kind of slow, but actually better than I had expected. Differential Revision: https://phab.mercurial-scm.org/D5991
mercurial/configitems.py
mercurial/copies.py
tests/test-copies.t
--- a/mercurial/configitems.py	Fri Jan 18 13:13:30 2019 -0800
+++ b/mercurial/configitems.py	Tue Feb 19 15:42:45 2019 -0800
@@ -482,6 +482,9 @@
 coreconfigitem('experimental', 'copytrace.sourcecommitlimit',
     default=100,
 )
+coreconfigitem('experimental', 'copies.read-from',
+    default="filelog-only",
+)
 coreconfigitem('experimental', 'crecordtest',
     default=None,
 )
--- a/mercurial/copies.py	Fri Jan 18 13:13:30 2019 -0800
+++ b/mercurial/copies.py	Tue Feb 19 15:42:45 2019 -0800
@@ -166,6 +166,10 @@
     # files might have to be traced back to the fctx parent of the last
     # one-side-only changeset, but not further back than that
     repo = a._repo
+
+    if repo.ui.config('experimental', 'copies.read-from') == 'compatibility':
+        return _changesetforwardcopies(a, b, match)
+
     debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
     dbg = repo.ui.debug
     if debug:
@@ -216,6 +220,76 @@
                 % (util.timer() - start))
     return cm
 
+def _changesetforwardcopies(a, b, match):
+    if a.rev() == node.nullrev:
+        return {}
+
+    repo = a.repo()
+    children = {}
+    cl = repo.changelog
+    missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
+    for r in missingrevs:
+        for p in cl.parentrevs(r):
+            if p == node.nullrev:
+                continue
+            if p not in children:
+                children[p] = [r]
+            else:
+                children[p].append(r)
+
+    roots = set(children) - set(missingrevs)
+    # 'work' contains 3-tuples of a (revision number, parent number, copies).
+    # The parent number is only used for knowing which parent the copies dict
+    # came from.
+    work = [(r, 1, {}) for r in roots]
+    heapq.heapify(work)
+    while work:
+        r, i1, copies1 = heapq.heappop(work)
+        if work and work[0][0] == r:
+            # We are tracing copies from both parents
+            r, i2, copies2 = heapq.heappop(work)
+            copies = {}
+            ctx = repo[r]
+            p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
+            allcopies = set(copies1) | set(copies2)
+            # TODO: perhaps this filtering should be done as long as ctx
+            # is merge, whether or not we're tracing from both parent.
+            for dst in allcopies:
+                if not match(dst):
+                    continue
+                if dst not in copies2:
+                    # Copied on p1 side: mark as copy from p1 side if it didn't
+                    # already exist on p2 side
+                    if dst not in p2man:
+                        copies[dst] = copies1[dst]
+                elif dst not in copies1:
+                    # Copied on p2 side: mark as copy from p2 side if it didn't
+                    # already exist on p1 side
+                    if dst not in p1man:
+                        copies[dst] = copies2[dst]
+                else:
+                    # Copied on both sides: mark as copy from p1 side
+                    copies[dst] = copies1[dst]
+        else:
+            copies = copies1
+        if r == b.rev():
+            return copies
+        for c in children[r]:
+            childctx = repo[c]
+            if r == childctx.p1().rev():
+                parent = 1
+                childcopies = childctx.p1copies()
+            else:
+                assert r == childctx.p2().rev()
+                parent = 2
+                childcopies = childctx.p2copies()
+            if not match.always():
+                childcopies = {dst: src for dst, src in childcopies.items()
+                               if match(dst)}
+            childcopies = _chain(a, childctx, copies, childcopies)
+            heapq.heappush(work, (c, parent, childcopies))
+    assert False
+
 def _forwardcopies(a, b, match=None):
     """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
 
--- a/tests/test-copies.t	Fri Jan 18 13:13:30 2019 -0800
+++ b/tests/test-copies.t	Tue Feb 19 15:42:45 2019 -0800
@@ -1,9 +1,17 @@
+#testcases filelog compatibility
 
   $ cat >> $HGRCPATH << EOF
   > [alias]
   > l = log -G -T '{rev} {desc}\n{files}\n'
   > EOF
 
+#if compatibility
+  $ cat >> $HGRCPATH << EOF
+  > [experimental]
+  > copies.read-from = compatibility
+  > EOF
+#endif
+
   $ REPONUM=0
   $ newrepo() {
   >     cd $TESTTMP
@@ -338,7 +346,7 @@
   $ hg debugpathcopies 1 2
   x -> z
   $ hg debugpathcopies 0 2
-  x -> z
+  x -> z (filelog !)
 
 Copy file that exists on both sides of the merge, different content
   $ newrepo
@@ -476,7 +484,8 @@
   $ hg debugpathcopies 1 4
   $ hg debugpathcopies 2 4
   $ hg debugpathcopies 0 4
-  x -> z
+  x -> z (filelog !)
+  y -> z (compatibility !)
   $ hg debugpathcopies 1 5
   $ hg debugpathcopies 2 5
   $ hg debugpathcopies 0 5