cvsps: recognize and eliminate CVS' synthetic "file added" revisions. 1.2.1
authorGreg Ward <greg-hg@gerg.ca>
Wed, 18 Mar 2009 09:15:38 -0400
changeset 7862 02981000012e
parent 7861 2bc14da14992
child 7863 34eaa67fc7ea
cvsps: recognize and eliminate CVS' synthetic "file added" revisions.
hgext/convert/cvsps.py
tests/test-convert-cvs-synthetic
tests/test-convert-cvs-synthetic.out
--- a/hgext/convert/cvsps.py	Mon Mar 09 21:00:37 2009 -0500
+++ b/hgext/convert/cvsps.py	Wed Mar 18 09:15:38 2009 -0400
@@ -33,6 +33,7 @@
         .rcs       - name of file as returned from CVS
         .revision  - revision number as tuple
         .tags      - list of tags on the file
+        .synthetic - is this a synthetic "file ... added on ..." revision?
     '''
     def __init__(self, **entries):
         self.__dict__.update(entries)
@@ -107,6 +108,8 @@
     re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?')
     re_70 = re.compile('branches: (.+);$')
 
+    file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch')
+
     prefix = ''   # leading path to strip of what we get from CVS
 
     if directory is None:
@@ -279,7 +282,8 @@
             assert match, _('expected revision number')
             e = logentry(rcs=scache(rcs), file=scache(filename),
                     revision=tuple([int(x) for x in match.group(1).split('.')]),
-                    branches=[], parent=None)
+                    branches=[], parent=None,
+                    synthetic=False)
             state = 6
 
         elif state == 6:
@@ -338,6 +342,22 @@
             else:
                 e.comment.append(line)
 
+        # When a file is added on a branch B1, CVS creates a synthetic
+        # dead trunk revision 1.1 so that the branch has a root.
+        # Likewise, if you merge such a file to a later branch B2 (one
+        # that already existed when the file was added on B1), CVS
+        # creates a synthetic dead revision 1.1.x.1 on B2.  Don't drop
+        # these revisions now, but mark them synthetic so
+        # createchangeset() can take care of them.
+        if (store and
+              e.dead and
+              e.revision[-1] == 1 and      # 1.1 or 1.1.x.1
+              len(e.comment) == 1 and
+              file_added_re.match(e.comment[0])):
+            ui.debug(_('found synthetic rev in %s: %r\n')
+                     % (e.rcs, e.comment[0]))
+            e.synthetic = True
+
         if store:
             # clean up the results and save in the log.
             store = False
@@ -399,6 +419,7 @@
         .entries   - list of logentry objects in this changeset
         .parents   - list of one or two parent changesets
         .tags      - list of tags on this changeset
+        .synthetic - from synthetic revision "file ... added on branch ..."
     '''
     def __init__(self, **entries):
         self.__dict__.update(entries)
@@ -438,6 +459,19 @@
         files[e.file] = True
         c.date = e.date       # changeset date is date of latest commit in it
 
+    # Mark synthetic changesets
+
+    for c in changesets:
+        # Synthetic revisions always get their own changeset, because
+        # the log message includes the filename.  E.g. if you add file3
+        # and file4 on a branch, you get four log entries and three
+        # changesets:
+        #   "File file3 was added on branch ..." (synthetic, 1 entry)
+        #   "File file4 was added on branch ..." (synthetic, 1 entry)
+        #   "Add file3 and file4 to fix ..."     (real, 2 entries)
+        # Hence the check for 1 entry here.
+        c.synthetic = (len(c.entries) == 1 and c.entries[0].synthetic)
+
     # Sort files in each changeset
 
     for c in changesets:
@@ -546,7 +580,20 @@
 
         c.parents = []
         if p is not None:
-            c.parents.append(changesets[p])
+            p = changesets[p]
+
+            # Ensure no changeset has a synthetic changeset as a parent.
+            while p.synthetic:
+                assert len(p.parents) <= 1, \
+                       _('synthetic changeset cannot have multiple parents')
+                if p.parents:
+                    p = p.parents[0]
+                else:
+                    p = None
+                    break
+
+            if p is not None:
+                c.parents.append(p)
 
         if mergefrom:
             m = mergefrom.search(c.comment)
@@ -582,6 +629,15 @@
         branches[c.branch] = i
         i += 1
 
+    # Drop synthetic changesets (safe now that we have ensured no other
+    # changesets can have them as parents).
+    i = 0
+    while i < len(changesets):
+        if changesets[i].synthetic:
+            del changesets[i]
+        else:
+            i += 1
+
     # Number changesets
 
     for i, c in enumerate(changesets):
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-cvs-synthetic	Wed Mar 18 09:15:38 2009 -0400
@@ -0,0 +1,65 @@
+#!/bin/sh
+
+# This feature requires use of builtin cvsps!
+"$TESTDIR/hghave" cvs || exit 80
+
+# XXX lots of duplication with other test-convert-cvs* scripts
+
+set -e
+
+echo "[extensions]" >> $HGRCPATH
+echo "convert = " >> $HGRCPATH
+echo "[convert]" >> $HGRCPATH
+echo "cvsps=builtin" >> $HGRCPATH
+
+echo % create cvs repository with one project
+mkdir cvsrepo
+cd cvsrepo
+export CVSROOT=`pwd`
+export CVS_OPTIONS=-f
+cd ..
+
+filter='sed "s:$CVSROOT:*REPO*:g"'
+cvscall()
+{
+    cvs -f "$@" | eval $filter
+}
+
+cvscall -q -d "$CVSROOT" init
+mkdir cvsrepo/proj
+
+cvscall co proj
+
+echo % create file1 on the trunk
+cd proj
+touch file1
+cvscall add file1
+cvscall ci -m"add file1 on trunk" file1
+
+echo % create two branches
+cvscall tag -b v1_0
+cvscall tag -b v1_1
+
+echo % create file2 on branch v1_0
+cvs up -rv1_0
+touch file2
+cvscall add file2
+cvscall ci -m"add file2 on branch v1_0" file2
+
+echo % create file3, file4 on branch v1_1
+cvs up -rv1_1
+touch file3
+touch file4
+cvscall add file3 file4
+cvscall ci -m"add file3, file4 on branch v1_1" file3 file4
+
+echo % merge file2 from v1_0 to v1_1
+cvscall up -jv1_0
+cvscall ci -m"merge file2 from v1_0 to v1_1"
+
+echo % convert to hg
+cd ..
+hg convert proj proj.hg | eval $filter
+
+echo % hg log output
+hg -R proj.hg log --template "{rev} {desc}\n"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-cvs-synthetic.out	Wed Mar 18 09:15:38 2009 -0400
@@ -0,0 +1,72 @@
+% create cvs repository with one project
+cvs checkout: Updating proj
+% create file1 on the trunk
+cvs add: scheduling file `file1' for addition
+cvs add: use 'cvs commit' to add this file permanently
+RCS file: *REPO*/proj/file1,v
+done
+Checking in file1;
+*REPO*/proj/file1,v  <--  file1
+initial revision: 1.1
+done
+% create two branches
+cvs tag: Tagging .
+T file1
+cvs tag: Tagging .
+T file1
+% create file2 on branch v1_0
+cvs update: Updating .
+cvs add: scheduling file `file2' for addition on branch `v1_0'
+cvs add: use 'cvs commit' to add this file permanently
+RCS file: *REPO*/proj/Attic/file2,v
+done
+Checking in file2;
+*REPO*/proj/Attic/file2,v  <--  file2
+new revision: 1.1.2.1; previous revision: 1.1
+done
+% create file3, file4 on branch v1_1
+cvs update: Updating .
+cvs update: file2 is no longer in the repository
+cvs add: scheduling file `file3' for addition on branch `v1_1'
+cvs add: scheduling file `file4' for addition on branch `v1_1'
+cvs add: use 'cvs commit' to add these files permanently
+RCS file: *REPO*/proj/Attic/file3,v
+done
+Checking in file3;
+*REPO*/proj/Attic/file3,v  <--  file3
+new revision: 1.1.2.1; previous revision: 1.1
+done
+RCS file: *REPO*/proj/Attic/file4,v
+done
+Checking in file4;
+*REPO*/proj/Attic/file4,v  <--  file4
+new revision: 1.1.2.1; previous revision: 1.1
+done
+% merge file2 from v1_0 to v1_1
+cvs update: Updating .
+U file2
+cvs commit: Examining .
+Checking in file2;
+*REPO*/proj/Attic/file2,v  <--  file2
+new revision: 1.1.4.2; previous revision: 1.1.4.1
+done
+% convert to hg
+initializing destination proj.hg repository
+using builtin cvsps
+collecting CVS rlog
+9 log entries
+creating changesets
+4 changeset entries
+connecting to *REPO*
+scanning source...
+sorting...
+converting...
+3 add file1 on trunk
+2 add file2 on branch v1_0
+1 add file3, file4 on branch v1_1
+0 merge file2 from v1_0 to v1_1
+% hg log output
+3 merge file2 from v1_0 to v1_1
+2 add file3, file4 on branch v1_1
+1 add file2 on branch v1_0
+0 add file1 on trunk