addremove: add -s/--similarity option
authorVadim Gelfer <vadim.gelfer@gmail.com>
Fri, 18 Aug 2006 22:13:58 -0700
changeset 2958 ff3ea21a981a
parent 2957 6e062d9b188f
child 2959 7f5fc4b347de
addremove: add -s/--similarity option progress on issue 295.
mercurial/cmdutil.py
mercurial/commands.py
tests/test-addremove
tests/test-addremove.out
--- a/mercurial/cmdutil.py	Fri Aug 18 21:18:01 2006 -0700
+++ b/mercurial/cmdutil.py	Fri Aug 18 22:13:58 2006 -0700
@@ -8,7 +8,7 @@
 from demandload import demandload
 from node import *
 from i18n import gettext as _
-demandload(globals(), 'util')
+demandload(globals(), 'mdiff util')
 demandload(globals(), 'os sys')
 
 def make_filename(repo, pat, node,
@@ -93,19 +93,53 @@
     for r in results:
         yield r
 
-def addremove(repo, pats=[], opts={}, wlock=None, dry_run=None):
+def findrenames(repo, added=None, removed=None, threshold=0.5):
+    if added is None or removed is None:
+        added, removed = repo.status()[1:3]
+    changes = repo.changelog.read(repo.dirstate.parents()[0])
+    mf = repo.manifest.read(changes[0])
+    for a in added:
+        aa = repo.wread(a)
+        bestscore, bestname = None, None
+        for r in removed:
+            rr = repo.file(r).read(mf[r])
+            delta = mdiff.textdiff(aa, rr)
+            if len(delta) < len(aa):
+                myscore = 1.0 - (float(len(delta)) / len(aa))
+                if bestscore is None or myscore > bestscore:
+                    bestscore, bestname = myscore, r
+        if bestname and bestscore >= threshold:
+            yield bestname, a, bestscore
+
+def addremove(repo, pats=[], opts={}, wlock=None, dry_run=None,
+              similarity=None):
     if dry_run is None:
         dry_run = opts.get('dry_run')
+    if similarity is None:
+        similarity = float(opts.get('similarity') or 0)
     add, remove = [], []
+    mapping = {}
     for src, abs, rel, exact in walk(repo, pats, opts):
         if src == 'f' and repo.dirstate.state(abs) == '?':
             add.append(abs)
+            mapping[abs] = rel, exact
             if repo.ui.verbose or not exact:
                 repo.ui.status(_('adding %s\n') % ((pats and rel) or abs))
         if repo.dirstate.state(abs) != 'r' and not os.path.exists(rel):
             remove.append(abs)
+            mapping[abs] = rel, exact
             if repo.ui.verbose or not exact:
                 repo.ui.status(_('removing %s\n') % ((pats and rel) or abs))
     if not dry_run:
         repo.add(add, wlock=wlock)
         repo.remove(remove, wlock=wlock)
+    if similarity > 0:
+        for old, new, score in findrenames(repo, add, remove, similarity):
+            oldrel, oldexact = mapping[old]
+            newrel, newexact = mapping[new]
+            if repo.ui.verbose or not oldexact or not newexact:
+                repo.ui.status(_('recording removal of %s as rename to %s '
+                                 '(%d%% similar)\n') %
+                               (oldrel, newrel, score * 100))
+            if not dry_run:
+                repo.copy(old, new, wlock=wlock)
--- a/mercurial/commands.py	Fri Aug 18 21:18:01 2006 -0700
+++ b/mercurial/commands.py	Fri Aug 18 22:13:58 2006 -0700
@@ -658,8 +658,17 @@
 
     New files are ignored if they match any of the patterns in .hgignore. As
     with add, these changes take effect at the next commit.
+
+    Use the -s option to detect renamed files.  With a parameter > 0,
+    this compares every removed file with every added file and records
+    those similar enough as renames.  This option takes a percentage
+    between 0 (disabled) and 100 (files must be identical) as its
+    parameter.  Detecting renamed files this way can be expensive.
     """
-    return cmdutil.addremove(repo, pats, opts)
+    sim = float(opts.get('similarity') or 0)
+    if sim < 0 or sim > 100:
+        raise util.Abort(_('similarity must be between 0 and 100'))
+    return cmdutil.addremove(repo, pats, opts, similarity=sim/100.)
 
 def annotate(ui, repo, *pats, **opts):
     """show changeset information per file line
@@ -2747,7 +2756,10 @@
         (addremove,
          [('I', 'include', [], _('include names matching the given patterns')),
           ('X', 'exclude', [], _('exclude names matching the given patterns')),
-          ('n', 'dry-run', None, _('do not perform actions, just print output'))],
+          ('n', 'dry-run', None,
+           _('do not perform actions, just print output')),
+          ('s', 'similarity', '',
+           _('guess renamed files by similarity (0<=s<=1)'))],
          _('hg addremove [OPTION]... [FILE]...')),
     "^annotate":
         (annotate,
--- a/tests/test-addremove	Fri Aug 18 21:18:01 2006 -0700
+++ b/tests/test-addremove	Fri Aug 18 22:13:58 2006 -0700
@@ -10,3 +10,17 @@
 touch ../foo_2 bar_2
 hg -v addremove
 hg -v commit -m "add 2" -d "1000000 0"
+
+cd ..
+hg init sim
+cd sim
+echo a > a
+echo a >> a
+echo a >> a
+echo c > c
+hg commit -Ama
+mv a b
+rm c
+echo d > d
+hg addremove -s 0.5
+hg commit -mb
--- a/tests/test-addremove.out	Fri Aug 18 21:18:01 2006 -0700
+++ b/tests/test-addremove.out	Fri Aug 18 22:13:58 2006 -0700
@@ -6,3 +6,10 @@
 adding foo_2
 dir/bar_2
 foo_2
+adding a
+adding c
+adding b
+adding d
+removing a
+removing c
+recording removal of a as rename to b (100% similar)