mercurial/copies.py
changeset 34179 036d47d7cf39
parent 34078 b4b196092cc3
child 34288 fc3b8483c6cb
--- a/mercurial/copies.py	Fri Jun 30 03:36:46 2017 +0200
+++ b/mercurial/copies.py	Sun Sep 03 03:49:15 2017 +0530
@@ -7,7 +7,9 @@
 
 from __future__ import absolute_import
 
+import collections
 import heapq
+import os
 
 from . import (
     match as matchmod,
@@ -364,6 +366,8 @@
     # rebase.
     if copytracing == 'off':
         return {}, {}, {}, {}, {}
+    elif copytracing == 'heuristics':
+        return _heuristicscopytracing(repo, c1, c2, base)
     else:
         return _fullcopytracing(repo, c1, c2, base)
 
@@ -599,6 +603,94 @@
 
     return copy, movewithdir, diverge, renamedelete, dirmove
 
+def _heuristicscopytracing(repo, c1, c2, base):
+    """ Fast copytracing using filename heuristics
+
+    Assumes that moves or renames are of following two types:
+
+    1) Inside a directory only (same directory name but different filenames)
+    2) Move from one directory to another
+                    (same filenames but different directory names)
+
+    Works only when there are no merge commits in the "source branch".
+    Source branch is commits from base up to c2 not including base.
+
+    If merge is involved it fallbacks to _fullcopytracing().
+
+    Can be used by setting the following config:
+
+        [experimental]
+        copytrace = heuristics
+    """
+
+    if c1.rev() is None:
+        c1 = c1.p1()
+    if c2.rev() is None:
+        c2 = c2.p1()
+
+    copies = {}
+
+    changedfiles = set()
+    m1 = c1.manifest()
+    if not repo.revs('%d::%d', base.rev(), c2.rev()):
+        # If base is not in c2 branch, we switch to fullcopytracing
+        repo.ui.debug("switching to full copytracing as base is not "
+                      "an ancestor of c2\n")
+        return _fullcopytracing(repo, c1, c2, base)
+
+    ctx = c2
+    while ctx != base:
+        if len(ctx.parents()) == 2:
+            # To keep things simple let's not handle merges
+            repo.ui.debug("switching to full copytracing because of merges\n")
+            return _fullcopytracing(repo, c1, c2, base)
+        changedfiles.update(ctx.files())
+        ctx = ctx.p1()
+
+    cp = _forwardcopies(base, c2)
+    for dst, src in cp.iteritems():
+        if src in m1:
+            copies[dst] = src
+
+    # file is missing if it isn't present in the destination, but is present in
+    # the base and present in the source.
+    # Presence in the base is important to exclude added files, presence in the
+    # source is important to exclude removed files.
+    missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
+                          changedfiles)
+
+    if missingfiles:
+        basenametofilename = collections.defaultdict(list)
+        dirnametofilename = collections.defaultdict(list)
+
+        for f in m1.filesnotin(base.manifest()):
+            basename = os.path.basename(f)
+            dirname = os.path.dirname(f)
+            basenametofilename[basename].append(f)
+            dirnametofilename[dirname].append(f)
+
+        # in case of a rebase/graft, base may not be a common ancestor
+        anc = c1.ancestor(c2)
+
+        for f in missingfiles:
+            basename = os.path.basename(f)
+            dirname = os.path.dirname(f)
+            samebasename = basenametofilename[basename]
+            samedirname = dirnametofilename[dirname]
+            movecandidates = samebasename + samedirname
+            # f is guaranteed to be present in c2, that's why
+            # c2.filectx(f) won't fail
+            f2 = c2.filectx(f)
+            for candidate in movecandidates:
+                f1 = c1.filectx(candidate)
+                if _related(f1, f2, anc.rev()):
+                    # if there are a few related copies then we'll merge
+                    # changes into all of them. This matches the behaviour
+                    # of upstream copytracing
+                    copies[candidate] = f
+
+    return copies, {}, {}, {}, {}
+
 def _related(f1, f2, limit):
     """return True if f1 and f2 filectx have a common ancestor