convert: introduce --full for converting all files
authorMads Kiilerich <madski@unity3d.com>
Tue, 26 Aug 2014 22:03:32 +0200
changeset 22300 35ab037de989
parent 22299 98aafdf4cbf6
child 22304 5678b0e3608f
convert: introduce --full for converting all files Convert will normally only process files that were changed in a source revision, apply the filemap, and record it has a change in the target repository. (If it ends up not really changing anything, nothing changes.) That means that _if_ the filemap is changed before continuing an incremental convert, the change will only kick in when the files it affects are modified in a source revision and thus processed. With --full, convert will make a full conversion every time and process all files in the source repo and remove target repo files that shouldn't be there. Filemap changes will thus kick in on the first converted revision, no matter what is changed. This flag should in most cases not make any difference but will make convert significantly slower. Other names has been considered for this feature, such as "resync", "sync", "checkunmodified", "all" or "allfiles", but I found that they were less obvious and required more explanation than "full" and were harder to describe consistently.
hgext/convert/__init__.py
hgext/convert/bzr.py
hgext/convert/common.py
hgext/convert/convcmd.py
hgext/convert/cvs.py
hgext/convert/darcs.py
hgext/convert/filemap.py
hgext/convert/git.py
hgext/convert/gnuarch.py
hgext/convert/hg.py
hgext/convert/monotone.py
hgext/convert/p4.py
hgext/convert/subversion.py
tests/test-convert-hg-sink.t
tests/test-convert-svn-sink.t
tests/test-convert-svn-source.t
tests/test-convert.t
--- a/hgext/convert/__init__.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/__init__.py	Tue Aug 26 22:03:32 2014 +0200
@@ -29,6 +29,8 @@
     ('A', 'authormap', '', _('remap usernames using this file'), _('FILE')),
     ('', 'filemap', '', _('remap file names using contents of file'),
      _('FILE')),
+    ('', 'full', None,
+     _('apply filemap changes by converting all files again')),
     ('', 'splicemap', '', _('splice synthesized history into place'),
      _('FILE')),
     ('', 'branchmap', '', _('change branch names while converting'),
@@ -131,6 +133,14 @@
     it is converted. To rename from a subdirectory into the root of
     the repository, use ``.`` as the path to rename to.
 
+    ``--full`` will make sure the converted changesets contain exactly
+    the right files with the right content. It will make a full
+    conversion of all files, not just the ones that have
+    changed. Files that already are correct will not be changed. This
+    can be used to apply filemap changes when converting
+    incrementally. This is currently only supported for Mercurial and
+    Subversion.
+
     The splicemap is a file that allows insertion of synthetic
     history, letting you specify the parents of a revision. This is
     useful if you want to e.g. give a Subversion merge two parents, or
--- a/hgext/convert/bzr.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/bzr.py	Tue Aug 26 22:03:32 2014 +0200
@@ -134,8 +134,9 @@
             sio = revtree.get_file(fileid)
             return sio.read(), mode
 
-    def getchanges(self, version):
-        # set up caches: modecache and revtree
+    def getchanges(self, version, full):
+        if full:
+            raise util.Abort(_("convert from cvs do not support --full"))
         self._modecache = {}
         self._revtree = self.sourcerepo.revision_tree(version)
         # get the parentids from the cache
--- a/hgext/convert/common.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/common.py	Tue Aug 26 22:03:32 2014 +0200
@@ -93,12 +93,13 @@
         """
         raise NotImplementedError
 
-    def getchanges(self, version):
+    def getchanges(self, version, full):
         """Returns a tuple of (files, copies).
 
         files is a sorted list of (filename, id) tuples for all files
         changed between version and its first parent returned by
-        getcommit(). id is the source revision id of the file.
+        getcommit(). If full, all files in that revision is returned.
+        id is the source revision id of the file.
 
         copies is a dictionary of dest: source
         """
@@ -204,7 +205,7 @@
         mapping equivalent authors identifiers for each system."""
         return None
 
-    def putcommit(self, files, copies, parents, commit, source, revmap):
+    def putcommit(self, files, copies, parents, commit, source, revmap, full):
         """Create a revision with all changed files listed in 'files'
         and having listed parents. 'commit' is a commit object
         containing at a minimum the author, date, and message for this
@@ -212,7 +213,8 @@
         'copies' is a dictionary mapping destinations to sources,
         'source' is the source repository, and 'revmap' is a mapfile
         of source revisions to converted revisions. Only getfile() and
-        lookuprev() should be called on 'source'.
+        lookuprev() should be called on 'source'. 'full' means that 'files'
+        is complete and all other files should be removed.
 
         Note that the sink repository is not told to update itself to
         a particular revision (or even what that revision would be)
--- a/hgext/convert/convcmd.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/convcmd.py	Tue Aug 26 22:03:32 2014 +0200
@@ -386,8 +386,8 @@
 
     def copy(self, rev):
         commit = self.commitcache[rev]
-
-        changes = self.source.getchanges(rev)
+        full = self.opts.get('full')
+        changes = self.source.getchanges(rev, full)
         if isinstance(changes, basestring):
             if changes == SKIPREV:
                 dest = SKIPREV
@@ -413,7 +413,7 @@
             parents = [b[0] for b in pbranches]
         source = progresssource(self.ui, self.source, len(files))
         newnode = self.dest.putcommit(files, copies, parents, commit,
-                                      source, self.map)
+                                      source, self.map, full)
         source.close()
         self.source.converted(rev, newnode)
         self.map[rev] = newnode
--- a/hgext/convert/cvs.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/cvs.py	Tue Aug 26 22:03:32 2014 +0200
@@ -258,7 +258,9 @@
                 else:
                     raise util.Abort(_("unknown CVS response: %s") % line)
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
+        if full:
+            raise util.Abort(_("convert from cvs do not support --full"))
         self._parse()
         return sorted(self.files[rev].iteritems()), {}
 
--- a/hgext/convert/darcs.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/darcs.py	Tue Aug 26 22:03:32 2014 +0200
@@ -156,7 +156,9 @@
             output, status = self.run('revert', all=True, repodir=self.tmppath)
             self.checkexit(status, output)
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
+        if full:
+            raise util.Abort(_("convert from darcs do not support --full"))
         copies = {}
         changes = []
         man = None
--- a/hgext/convert/filemap.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/filemap.py	Tue Aug 26 22:03:32 2014 +0200
@@ -304,7 +304,7 @@
         wrev.add(rev)
         self.wantedancestors[rev] = wrev
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
         parents = self.commits[rev].parents
         if len(parents) > 1:
             self.rebuild()
@@ -384,7 +384,7 @@
         # Get the real changes and do the filtering/mapping. To be
         # able to get the files later on in getfile, we hide the
         # original filename in the rev part of the return value.
-        changes, copies = self.base.getchanges(rev)
+        changes, copies = self.base.getchanges(rev, full)
         files = {}
         for f, r in changes:
             newf = self.filemapper(f)
--- a/hgext/convert/git.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/git.py	Tue Aug 26 22:03:32 2014 +0200
@@ -180,7 +180,9 @@
                 continue
             m.node = node.strip()
 
-    def getchanges(self, version):
+    def getchanges(self, version, full):
+        if full:
+            raise util.Abort(_("convert from git do not support --full"))
         self.modecache = {}
         fh = self.gitopen("git diff-tree -z --root -m -r %s" % version)
         changes = []
--- a/hgext/convert/gnuarch.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/gnuarch.py	Tue Aug 26 22:03:32 2014 +0200
@@ -142,7 +142,9 @@
 
         return self._getfile(name, rev)
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
+        if full:
+            raise util.Abort(_("convert from arch do not support --full"))
         self._update(rev)
         changes = []
         copies = {}
--- a/hgext/convert/hg.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/hg.py	Tue Aug 26 22:03:32 2014 +0200
@@ -128,11 +128,13 @@
             fp.write('%s %s\n' % (revid, s[1]))
         return fp.getvalue()
 
-    def putcommit(self, files, copies, parents, commit, source, revmap):
-
+    def putcommit(self, files, copies, parents, commit, source, revmap, full):
         files = dict(files)
         def getfilectx(repo, memctx, f):
-            v = files[f]
+            try:
+                v = files[f]
+            except KeyError:
+                return None
             data, mode = source.getfile(f, v)
             if data is None:
                 return None
@@ -193,7 +195,10 @@
         while parents:
             p1 = p2
             p2 = parents.pop(0)
-            ctx = context.memctx(self.repo, (p1, p2), text, files.keys(),
+            fileset = set(files)
+            if full:
+                fileset.update(self.repo[p1], self.repo[p2])
+            ctx = context.memctx(self.repo, (p1, p2), text, fileset,
                                  getfilectx, commit.author, commit.date, extra)
             self.repo.commitctx(ctx)
             text = "(octopus merge fixup)\n"
@@ -356,17 +361,18 @@
         except error.LookupError:
             return None, None
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
         ctx = self.changectx(rev)
         parents = self.parents(ctx)
-        if not parents:
+        if full or not parents:
             files = copyfiles = ctx.manifest()
-        else:
+        if parents:
             if self._changescache[0] == rev:
                 m, a, r = self._changescache[1]
             else:
                 m, a, r = self.repo.status(parents[0].node(), ctx.node())[:3]
-            files = m + a + r
+            if not full:
+                files = m + a + r
             copyfiles = m + a
         # getcopies() is also run for roots and before filtering so missing
         # revlogs are detected early
--- a/hgext/convert/monotone.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/monotone.py	Tue Aug 26 22:03:32 2014 +0200
@@ -224,7 +224,9 @@
         else:
             return [self.rev]
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
+        if full:
+            raise util.Abort(_("convert from monotone do not support --full"))
         revision = self.mtnrun("get_revision", rev).split("\n\n")
         files = {}
         ignoremove = {}
--- a/hgext/convert/p4.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/p4.py	Tue Aug 26 22:03:32 2014 +0200
@@ -192,7 +192,9 @@
 
         return contents, mode
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
+        if full:
+            raise util.Abort(_("convert from p4 do not support --full"))
         return self.files[rev], {}
 
     def getcommit(self, rev):
--- a/hgext/convert/subversion.py	Tue Aug 26 22:03:32 2014 +0200
+++ b/hgext/convert/subversion.py	Tue Aug 26 22:03:32 2014 +0200
@@ -444,37 +444,37 @@
 
         return self.heads
 
-    def _getchanges(self, rev):
+    def _getchanges(self, rev, full):
         (paths, parents) = self.paths[rev]
+        copies = {}
         if parents:
             files, self.removed, copies = self.expandpaths(rev, paths, parents)
-        else:
+        if full or not parents:
             # Perform a full checkout on roots
             uuid, module, revnum = revsplit(rev)
             entries = svn.client.ls(self.baseurl + quote(module),
                                     optrev(revnum), True, self.ctx)
             files = [n for n, e in entries.iteritems()
                      if e.kind == svn.core.svn_node_file]
-            copies = {}
             self.removed = set()
 
         files.sort()
         files = zip(files, [rev] * len(files))
         return (files, copies)
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
         # reuse cache from getchangedfiles
-        if self._changescache[0] == rev:
+        if self._changescache[0] == rev and not full:
             (files, copies) = self._changescache[1]
         else:
-            (files, copies) = self._getchanges(rev)
+            (files, copies) = self._getchanges(rev, full)
             # caller caches the result, so free it here to release memory
             del self.paths[rev]
         return (files, copies)
 
     def getchangedfiles(self, rev, i):
         # called from filemap - cache computed values for reuse in getchanges
-        (files, copies) = self._getchanges(rev)
+        (files, copies) = self._getchanges(rev, False)
         self._changescache = (rev, (files, copies))
         return [f[0] for f in files]
 
@@ -1222,7 +1222,7 @@
     def revid(self, rev):
         return u"svn:%s@%s" % (self.uuid, rev)
 
-    def putcommit(self, files, copies, parents, commit, source, revmap):
+    def putcommit(self, files, copies, parents, commit, source, revmap, full):
         for parent in parents:
             try:
                 return self.revid(self.childmap[parent])
@@ -1238,6 +1238,8 @@
                 self.putfile(f, mode, data)
                 if f in copies:
                     self.copies.append([copies[f], f])
+        if full:
+            self.delete.extend(sorted(self.manifest.difference(files)))
         files = [f[0] for f in files]
 
         entries = set(self.delete)
--- a/tests/test-convert-hg-sink.t	Tue Aug 26 22:03:32 2014 +0200
+++ b/tests/test-convert-hg-sink.t	Tue Aug 26 22:03:32 2014 +0200
@@ -537,3 +537,16 @@
   |
   o  0 0 (a-only f)
   
+Convert with --full adds and removes files that didn't change
+
+  $ echo f >> 0/f
+  $ hg -R 0 ci -m "f"
+  $ hg convert --filemap filemap-b --full 0 a --config convert.hg.revs=1::
+  scanning source...
+  sorting...
+  converting...
+  0 f
+  $ hg -R a status --change tip
+  M f
+  A b-only
+  R a-only
--- a/tests/test-convert-svn-sink.t	Tue Aug 26 22:03:32 2014 +0200
+++ b/tests/test-convert-svn-sink.t	Tue Aug 26 22:03:32 2014 +0200
@@ -247,6 +247,31 @@
 
 #endif
 
+Convert with --full adds and removes files that didn't change
+
+  $ touch a/f
+  $ hg -R a ci -Aqmf
+  $ echo "rename c d" > filemap
+  $ hg convert -d svn a --filemap filemap --full
+  assuming destination a-hg
+  initializing svn working copy 'a-hg-wc'
+  scanning source...
+  sorting...
+  converting...
+  0 f
+  $ svnupanddisplay a-hg-wc 1
+   9 9 test .
+   9 9 test d
+   9 9 test f
+  revision: 9
+  author: test
+  msg: f
+   D /c
+   A /d
+   D /d1
+   A /f
+   D /newlink
+
   $ rm -rf a a-hg a-hg-wc
 
 
--- a/tests/test-convert-svn-source.t	Tue Aug 26 22:03:32 2014 +0200
+++ b/tests/test-convert-svn-source.t	Tue Aug 26 22:03:32 2014 +0200
@@ -168,6 +168,27 @@
   |
   o  0 second letter files: letter2.txt
   
+Convert with --full adds and removes files that didn't change
+
+  $ cd B
+  $ echo >> "letter .txt"
+  $ svn ci -m 'nothing'
+  Sending        letter .txt
+  Transmitting file data .
+  Committed revision 9.
+  $ cd ..
+
+  $ echo 'rename letter2.txt letter3.txt' > filemap
+  $ hg convert --filemap filemap --full "$SVNREPOURL/proj%20B/mytrunk" fmap
+  scanning source...
+  sorting...
+  converting...
+  0 nothing
+  $ hg -R fmap st --change tip
+  A letter .txt
+  A letter3.txt
+  R letter2.txt
+
 test invalid splicemap1
 
   $ cat > splicemap <<EOF
--- a/tests/test-convert.t	Tue Aug 26 22:03:32 2014 +0200
+++ b/tests/test-convert.t	Tue Aug 26 22:03:32 2014 +0200
@@ -91,6 +91,13 @@
       directory if it is converted. To rename from a subdirectory into the root
       of the repository, use "." as the path to rename to.
   
+      "--full" will make sure the converted changesets contain exactly the right
+      files with the right content. It will make a full conversion of all files,
+      not just the ones that have changed. Files that already are correct will
+      not be changed. This can be used to apply filemap changes when converting
+      incrementally. This is currently only supported for Mercurial and
+      Subversion.
+  
       The splicemap is a file that allows insertion of synthetic history,
       letting you specify the parents of a revision. This is useful if you want
       to e.g. give a Subversion merge two parents, or graft two disconnected
@@ -265,6 +272,7 @@
    -r --rev REV          import up to source revision REV
    -A --authormap FILE   remap usernames using this file
       --filemap FILE     remap file names using contents of file
+      --full             apply filemap changes by converting all files again
       --splicemap FILE   splice synthesized history into place
       --branchmap FILE   change branch names while converting
       --branchsort       try to sort changesets by branches