copies: rewrite copy detection for non-merge users
authorMatt Mackall <mpm@selenic.com>
Wed, 04 Jan 2012 17:55:30 -0600
changeset 15775 91eb4512edd0
parent 15774 0bd17a4bed88
child 15776 55a85a55f020
copies: rewrite copy detection for non-merge users The existing copy detection API was designed with merge in mind and was ill-suited for doing status/diff. The new pathcopies implementation gives more accurate, easier to use results for comparing two revisions, and is much simpler to understand. Test notes: - test-mv-cp-st.t results finds more renames in the reverse direction now - test-mq-merge.t was always wrong and duplicated a copy in diff that was already present in one of the parent revisions
mercurial/commands.py
mercurial/copies.py
tests/test-mq-merge.t
tests/test-mv-cp-st-diff.t
--- a/mercurial/commands.py	Wed Jan 04 15:48:02 2012 -0600
+++ b/mercurial/commands.py	Wed Jan 04 17:55:30 2012 -0600
@@ -5206,17 +5206,7 @@
     changestates = zip(states, 'MAR!?IC', stat)
 
     if (opts.get('all') or opts.get('copies')) and not opts.get('no_status'):
-        ctx1 = repo[node1]
-        ctx2 = repo[node2]
-        added = stat[1]
-        if node2 is None:
-            added = stat[0] + stat[1] # merged?
-
-        for k, v in copies.pathcopies(ctx1, ctx2).iteritems():
-            if k in added:
-                copy[k] = v
-            elif v in added:
-                copy[v] = k
+        copy = copies.pathcopies(repo[node1], repo[node2])
 
     for state, char, files in changestates:
         if state in show:
--- a/mercurial/copies.py	Wed Jan 04 15:48:02 2012 -0600
+++ b/mercurial/copies.py	Wed Jan 04 17:55:30 2012 -0600
@@ -84,8 +84,89 @@
         return None
     return limit
 
-def pathcopies(c1, c2):
-    return mergecopies(c1._repo, c1, c2, c1._repo["null"], False)[0]
+def _chain(src, dst, a, b):
+    '''chain two sets of copies a->b'''
+    t = a.copy()
+    for k, v in b.iteritems():
+        if v in t:
+            # found a chain
+            if t[v] != k:
+                # file wasn't renamed back to itself
+                t[k] = t[v]
+            if v not in dst:
+                # chain was a rename, not a copy
+                del t[v]
+        if v in src:
+            # file is a copy of an existing file
+            t[k] = v
+    return t
+
+def _tracefile(fctx, actx):
+    '''return file context that is the ancestor of fctx present in actx'''
+    stop = actx.rev()
+    am = actx.manifest()
+
+    for f in fctx.ancestors():
+        if am.get(f.path(), None) == f.filenode():
+            return f
+        if f.rev() < stop:
+            return None
+
+def _dirstatecopies(d):
+    ds = d._repo.dirstate
+    c = ds.copies().copy()
+    for k in c.keys():
+        if ds[k] not in 'anm':
+            del c[k]
+    return c
+
+def _forwardcopies(a, b):
+    '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
+
+    # check for working copy
+    w = None
+    if b.rev() is None:
+        w = b
+        b = w.p1()
+        if a == b:
+            # short-circuit to avoid issues with merge states
+            return _dirstatecopies(w)
+
+    # find where new files came from
+    # we currently don't try to find where old files went, too expensive
+    # this means we can miss a case like 'hg rm b; hg cp a b'
+    cm = {}
+    for f in b:
+        if f not in a:
+            ofctx = _tracefile(b[f], a)
+            if ofctx:
+                cm[f] = ofctx.path()
+
+    # combine copies from dirstate if necessary
+    if w is not None:
+        cm = _chain(a, w, cm, _dirstatecopies(w))
+
+    return cm
+
+def _backwardcopies(a, b):
+    # because the forward mapping is 1:n, we can lose renames here
+    # in particular, we find renames better than copies
+    f = _forwardcopies(b, a)
+    r = {}
+    for k, v in f.iteritems():
+        r[v] = k
+    return r
+
+def pathcopies(x, y):
+    '''find {dst@y: src@x} copy mapping for directed compare'''
+    if x == y or not x or not y:
+        return {}
+    a = y.ancestor(x)
+    if a == x:
+        return _forwardcopies(x, y)
+    if a == y:
+        return _backwardcopies(x, y)
+    return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
 
 def mergecopies(repo, c1, c2, ca, checkdirs=True):
     """
--- a/tests/test-mq-merge.t	Wed Jan 04 15:48:02 2012 -0600
+++ b/tests/test-mq-merge.t	Wed Jan 04 17:55:30 2012 -0600
@@ -149,13 +149,11 @@
   -b
   +a
   +c
-  diff --git a/a b/aa
-  copy from a
-  copy to aa
-  --- a/a
+  diff --git a/aa b/aa
+  new file mode 100644
+  --- /dev/null
   +++ b/aa
-  @@ -1,1 +1,1 @@
-  -b
+  @@ -0,0 +1,1 @@
   +a
 
 Check patcha2 is still a regular patch:
--- a/tests/test-mv-cp-st-diff.t	Wed Jan 04 15:48:02 2012 -0600
+++ b/tests/test-mv-cp-st-diff.t	Wed Jan 04 17:55:30 2012 -0600
@@ -560,6 +560,7 @@
   
   - parent to root: --rev . --rev 0
   M a
+    b
   R b
   
   diff --git a/a b/a
@@ -611,6 +612,7 @@
   
   - parent to branch: --rev . --rev 2
   M a
+    b
   A x/y
   R b
   
@@ -906,6 +908,7 @@
   
   - parent to root: --rev . --rev 0
   M a
+    b
   R b
   R c
   
@@ -975,6 +978,7 @@
   
   - parent to branch: --rev . --rev 2
   M a
+    b
   A x/y
   R b
   R c