patch: support diff data loss detection and upgrade
authorPatrick Mezard <pmezard@gmail.com>
Fri, 01 Jan 2010 20:54:05 +0100
changeset 10189 e451e599fbcf
parent 10188 fd6e9c7cd98c
child 10190 9c2c94934f0d
patch: support diff data loss detection and upgrade In worst case, generating diff in upgrade mode can be two times more expensive than generating it in git mode directly: we may have to regenerate the whole diff again whenever a git feature is detected. Also, the first diff attempt is completely buffered instead of being streamed. That said, even without having profiled it yet, I am convinced we can fast-path the upgrade mode if necessary were it to be used in regular diff commands, and not only in mq where avoiding data loss is worth the price.
mercurial/mdiff.py
mercurial/patch.py
tests/autodiff.py
tests/test-diff-upgrade
tests/test-diff-upgrade.out
--- a/mercurial/mdiff.py	Fri Jan 01 20:52:05 2010 +0100
+++ b/mercurial/mdiff.py	Fri Jan 01 20:54:05 2010 +0100
@@ -27,7 +27,9 @@
     nodates removes dates from diff headers
     ignorews ignores all whitespace changes in the diff
     ignorewsamount ignores changes in the amount of whitespace
-    ignoreblanklines ignores changes whose lines are all blank'''
+    ignoreblanklines ignores changes whose lines are all blank
+    upgrade generates git diffs to avoid data loss
+    '''
 
     defaults = {
         'context': 3,
@@ -38,6 +40,7 @@
         'ignorews': False,
         'ignorewsamount': False,
         'ignoreblanklines': False,
+        'upgrade': False,
         }
 
     __slots__ = defaults.keys()
--- a/mercurial/patch.py	Fri Jan 01 20:52:05 2010 +0100
+++ b/mercurial/patch.py	Fri Jan 01 20:54:05 2010 +0100
@@ -1246,17 +1246,25 @@
     ret.append('\n')
     return ''.join(ret)
 
-def _addmodehdr(header, omode, nmode):
-    if omode != nmode:
-        header.append('old mode %s\n' % omode)
-        header.append('new mode %s\n' % nmode)
+class GitDiffRequired(Exception):
+    pass
 
-def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None):
+def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None,
+         losedatafn=None):
     '''yields diff of changes to files between two nodes, or node and
     working directory.
 
     if node1 is None, use first dirstate parent instead.
-    if node2 is None, compare node1 with working directory.'''
+    if node2 is None, compare node1 with working directory.
+
+    losedatafn(**kwarg) is a callable run when opts.upgrade=True and
+    every time some change cannot be represented with the current
+    patch format. Return False to upgrade to git patch format, True to
+    accept the loss or raise an exception to abort the diff. It is
+    called with the name of current file being diffed as 'fn'. If set
+    to None, patches will always be upgraded to git format when
+    necessary.
+    '''
 
     if opts is None:
         opts = mdiff.defaultopts
@@ -1288,24 +1296,50 @@
     modified, added, removed = changes[:3]
 
     if not modified and not added and not removed:
-        return
+        return []
+
+    revs = None
+    if not repo.ui.quiet:
+        hexfunc = repo.ui.debugflag and hex or short
+        revs = [hexfunc(node) for node in [node1, node2] if node]
+
+    copy = {}
+    if opts.git or opts.upgrade:
+        copy = copies.copies(repo, ctx1, ctx2, repo[nullid])[0]
+        copy = copy.copy()
+        for k, v in copy.items():
+            copy[v] = k
+
+    difffn = lambda opts, losedata: trydiff(repo, revs, ctx1, ctx2,
+                 modified, added, removed, copy, getfilectx, opts, losedata)
+    if opts.upgrade and not opts.git:
+        try:
+            def losedata(fn):
+                if not losedatafn or not losedatafn(fn=fn):
+                    raise GitDiffRequired()
+            # Buffer the whole output until we are sure it can be generated
+            return list(difffn(opts.copy(git=False), losedata))
+        except GitDiffRequired:
+            return difffn(opts.copy(git=True), None)
+    else:
+        return difffn(opts, None)
+
+def _addmodehdr(header, omode, nmode):
+    if omode != nmode:
+        header.append('old mode %s\n' % omode)
+        header.append('new mode %s\n' % nmode)
+
+def trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
+            copy, getfilectx, opts, losedatafn):
 
     date1 = util.datestr(ctx1.date())
     man1 = ctx1.manifest()
 
-    revs = None
-    if not repo.ui.quiet and not opts.git:
-        hexfunc = repo.ui.debugflag and hex or short
-        revs = [hexfunc(node) for node in [node1, node2] if node]
+    gone = set()
+    gitmode = {'l': '120000', 'x': '100755', '': '100644'}
 
     if opts.git:
-        copy, diverge = copies.copies(repo, ctx1, ctx2, repo[nullid])
-        copy = copy.copy()
-        for k, v in copy.items():
-            copy[v] = k
-
-    gone = set()
-    gitmode = {'l': '120000', 'x': '100755', '': '100644'}
+        revs = None
 
     for f in sorted(modified + added + removed):
         to = None
@@ -1317,39 +1351,61 @@
         if f not in removed:
             tn = getfilectx(f, ctx2).data()
         a, b = f, f
-        if opts.git:
+        if opts.git or losedatafn:
             if f in added:
                 mode = gitmode[ctx2.flags(f)]
                 if f in copy:
-                    a = copy[f]
-                    omode = gitmode[man1.flags(a)]
-                    _addmodehdr(header, omode, mode)
-                    if a in removed and a not in gone:
-                        op = 'rename'
-                        gone.add(a)
+                    if opts.git:
+                        a = copy[f]
+                        omode = gitmode[man1.flags(a)]
+                        _addmodehdr(header, omode, mode)
+                        if a in removed and a not in gone:
+                            op = 'rename'
+                            gone.add(a)
+                        else:
+                            op = 'copy'
+                        header.append('%s from %s\n' % (op, a))
+                        header.append('%s to %s\n' % (op, f))
+                        to = getfilectx(a, ctx1).data()
                     else:
-                        op = 'copy'
-                    header.append('%s from %s\n' % (op, a))
-                    header.append('%s to %s\n' % (op, f))
-                    to = getfilectx(a, ctx1).data()
+                        losedatafn(f)
                 else:
-                    header.append('new file mode %s\n' % mode)
+                    if opts.git:
+                        header.append('new file mode %s\n' % mode)
+                    elif ctx2.flags(f):
+                        losedatafn(f)
                 if util.binary(tn):
-                    dodiff = 'binary'
+                    if opts.git:
+                        dodiff = 'binary'
+                    else:
+                        losedatafn(f)
+                if not opts.git and not tn:
+                    # regular diffs cannot represent new empty file
+                    losedatafn(f)
             elif f in removed:
-                # have we already reported a copy above?
-                if f in copy and copy[f] in added and copy[copy[f]] == f:
-                    dodiff = False
-                else:
-                    header.append('deleted file mode %s\n' %
-                                  gitmode[man1.flags(f)])
+                if opts.git:
+                    # have we already reported a copy above?
+                    if f in copy and copy[f] in added and copy[copy[f]] == f:
+                        dodiff = False
+                    else:
+                        header.append('deleted file mode %s\n' %
+                                      gitmode[man1.flags(f)])
+                elif not to:
+                    # regular diffs cannot represent empty file deletion
+                    losedatafn(f)
             else:
-                omode = gitmode[man1.flags(f)]
-                nmode = gitmode[ctx2.flags(f)]
-                _addmodehdr(header, omode, nmode)
-                if util.binary(to) or util.binary(tn):
-                    dodiff = 'binary'
-            header.insert(0, mdiff.diffline(revs, a, b, opts))
+                oflag = man1.flags(f)
+                nflag = ctx2.flags(f)
+                binary = util.binary(to) or util.binary(tn)
+                if opts.git:
+                    _addmodehdr(header, gitmode[oflag], gitmode[nflag])
+                    if binary:
+                        dodiff = 'binary'
+                elif binary or nflag != oflag:
+                    losedatafn(f)
+            if opts.git:
+                header.insert(0, mdiff.diffline(revs, a, b, opts))
+
         if dodiff:
             if dodiff == 'binary':
                 text = b85diff(to, tn)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/autodiff.py	Fri Jan 01 20:54:05 2010 +0100
@@ -0,0 +1,46 @@
+# Extension dedicated to test patch.diff() upgrade modes
+#
+#
+from mercurial import cmdutil, patch, util
+
+def autodiff(ui, repo, *pats, **opts):
+    diffopts = patch.diffopts(ui, opts)
+    git = opts.get('git', 'no')
+    brokenfiles = set()
+    losedatafn = None
+    if git in ('yes', 'no'):
+        diffopts.git = git == 'yes'
+        diffopts.upgrade = False
+    elif git == 'auto':
+        diffopts.git = False
+        diffopts.upgrade = True
+    elif git == 'warn':
+        diffopts.git = False
+        diffopts.upgrade = True
+        def losedatafn(fn=None, **kwargs):
+            brokenfiles.add(fn)
+            return True
+    elif git == 'abort':
+        diffopts.git = False
+        diffopts.upgrade = True
+        def losedatafn(fn=None, **kwargs):
+            raise util.Abort('losing data for %s' % fn)
+    else:
+        raise util.Abort('--git must be yes, no or auto')
+
+    node1, node2 = cmdutil.revpair(repo, [])
+    m = cmdutil.match(repo, pats, opts)
+    it = patch.diff(repo, node1, node2, match=m, opts=diffopts,
+                    losedatafn=losedatafn)
+    for chunk in it:
+        ui.write(chunk)
+    for fn in sorted(brokenfiles):
+        ui.write('data lost for: %s\n' % fn)
+
+cmdtable = {
+    "autodiff":
+        (autodiff,
+         [('', 'git', '', 'git upgrade mode (yes/no/auto/warn/abort)'),
+          ],
+         '[OPTION]... [FILE]...'),
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-diff-upgrade	Fri Jan 01 20:54:05 2010 +0100
@@ -0,0 +1,63 @@
+#!/bin/sh
+
+echo "[extensions]" >> $HGRCPATH
+echo "autodiff=$TESTDIR/autodiff.py" >> $HGRCPATH
+echo "[diff]" >> $HGRCPATH
+echo "nodates=1" >> $HGRCPATH
+
+hg init repo
+cd repo
+echo '% make a combination of new, changed and deleted file'
+echo regular > regular
+echo rmregular > rmregular
+touch rmempty
+echo exec > exec
+chmod +x exec
+echo rmexec > rmexec
+chmod +x rmexec
+echo setexec > setexec
+echo unsetexec > unsetexec
+chmod +x unsetexec
+echo binary > binary
+python -c "file('rmbinary', 'wb').write('\0')"
+hg ci -Am addfiles
+echo regular >> regular
+echo newregular >> newregular
+rm rmempty
+touch newempty
+rm rmregular
+echo exec >> exec
+echo newexec > newexec
+chmod +x newexec
+rm rmexec
+chmod +x setexec
+chmod -x unsetexec
+python -c "file('binary', 'wb').write('\0\0')"
+python -c "file('newbinary', 'wb').write('\0')"
+rm rmbinary
+hg addremove
+
+echo '% git=no: regular diff for all files'
+hg autodiff --git=no
+
+echo '% git=no: git diff for single regular file'
+hg autodiff --git=yes regular
+
+echo '% git=auto: regular diff for regular files and removals'
+hg autodiff --git=auto regular newregular rmregular rmbinary rmexec
+
+for f in exec newexec setexec unsetexec binary newbinary newempty rmempty; do
+    echo '% git=auto: git diff for' $f
+    hg autodiff --git=auto $f
+done
+
+echo '% git=warn: regular diff with data loss warnings'
+hg autodiff --git=warn
+
+echo '% git=abort: fail on execute bit change'
+hg autodiff --git=abort regular setexec
+
+echo '% git=abort: succeed on regular file'
+hg autodiff --git=abort regular
+
+cd ..
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-diff-upgrade.out	Fri Jan 01 20:54:05 2010 +0100
@@ -0,0 +1,186 @@
+% make a combination of new, changed and deleted file
+adding binary
+adding exec
+adding regular
+adding rmbinary
+adding rmempty
+adding rmexec
+adding rmregular
+adding setexec
+adding unsetexec
+adding newbinary
+adding newempty
+adding newexec
+adding newregular
+removing rmbinary
+removing rmempty
+removing rmexec
+removing rmregular
+% git=no: regular diff for all files
+diff -r b3f053cd7c7f binary
+Binary file binary has changed
+diff -r b3f053cd7c7f exec
+--- a/exec
++++ b/exec
+@@ -1,1 +1,2 @@
+ exec
++exec
+diff -r b3f053cd7c7f newbinary
+Binary file newbinary has changed
+diff -r b3f053cd7c7f newexec
+--- /dev/null
++++ b/newexec
+@@ -0,0 +1,1 @@
++newexec
+diff -r b3f053cd7c7f newregular
+--- /dev/null
++++ b/newregular
+@@ -0,0 +1,1 @@
++newregular
+diff -r b3f053cd7c7f regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular
+diff -r b3f053cd7c7f rmbinary
+Binary file rmbinary has changed
+diff -r b3f053cd7c7f rmexec
+--- a/rmexec
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmexec
+diff -r b3f053cd7c7f rmregular
+--- a/rmregular
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmregular
+% git=no: git diff for single regular file
+diff --git a/regular b/regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular
+% git=auto: regular diff for regular files and removals
+diff -r b3f053cd7c7f newregular
+--- /dev/null
++++ b/newregular
+@@ -0,0 +1,1 @@
++newregular
+diff -r b3f053cd7c7f regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular
+diff -r b3f053cd7c7f rmbinary
+Binary file rmbinary has changed
+diff -r b3f053cd7c7f rmexec
+--- a/rmexec
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmexec
+diff -r b3f053cd7c7f rmregular
+--- a/rmregular
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmregular
+% git=auto: git diff for exec
+diff -r b3f053cd7c7f exec
+--- a/exec
++++ b/exec
+@@ -1,1 +1,2 @@
+ exec
++exec
+% git=auto: git diff for newexec
+diff --git a/newexec b/newexec
+new file mode 100755
+--- /dev/null
++++ b/newexec
+@@ -0,0 +1,1 @@
++newexec
+% git=auto: git diff for setexec
+diff --git a/setexec b/setexec
+old mode 100644
+new mode 100755
+% git=auto: git diff for unsetexec
+diff --git a/unsetexec b/unsetexec
+old mode 100755
+new mode 100644
+% git=auto: git diff for binary
+diff --git a/binary b/binary
+index a9128c283485202893f5af379dd9beccb6e79486..09f370e38f498a462e1ca0faa724559b6630c04f
+GIT binary patch
+literal 2
+Jc${Nk0000200961
+
+% git=auto: git diff for newbinary
+diff --git a/newbinary b/newbinary
+new file mode 100644
+index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d
+GIT binary patch
+literal 1
+Ic${MZ000310RR91
+
+% git=auto: git diff for newempty
+diff --git a/newempty b/newempty
+new file mode 100644
+% git=auto: git diff for rmempty
+diff --git a/rmempty b/rmempty
+deleted file mode 100644
+% git=warn: regular diff with data loss warnings
+diff -r b3f053cd7c7f binary
+Binary file binary has changed
+diff -r b3f053cd7c7f exec
+--- a/exec
++++ b/exec
+@@ -1,1 +1,2 @@
+ exec
++exec
+diff -r b3f053cd7c7f newbinary
+Binary file newbinary has changed
+diff -r b3f053cd7c7f newexec
+--- /dev/null
++++ b/newexec
+@@ -0,0 +1,1 @@
++newexec
+diff -r b3f053cd7c7f newregular
+--- /dev/null
++++ b/newregular
+@@ -0,0 +1,1 @@
++newregular
+diff -r b3f053cd7c7f regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular
+diff -r b3f053cd7c7f rmbinary
+Binary file rmbinary has changed
+diff -r b3f053cd7c7f rmexec
+--- a/rmexec
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmexec
+diff -r b3f053cd7c7f rmregular
+--- a/rmregular
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmregular
+data lost for: binary
+data lost for: newbinary
+data lost for: newempty
+data lost for: newexec
+data lost for: rmempty
+data lost for: setexec
+data lost for: unsetexec
+% git=abort: fail on execute bit change
+abort: losing data for setexec
+% git=abort: succeed on regular file
+diff -r b3f053cd7c7f regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular