# HG changeset patch # User Greg Ward # Date 1237382138 14400 # Node ID 02981000012e3adf40c4849bd7b3d5618f9ce82d # Parent 2bc14da14992141f586b3a8f4dfe4253737cb1b0 cvsps: recognize and eliminate CVS' synthetic "file added" revisions. diff -r 2bc14da14992 -r 02981000012e hgext/convert/cvsps.py --- a/hgext/convert/cvsps.py Mon Mar 09 21:00:37 2009 -0500 +++ b/hgext/convert/cvsps.py Wed Mar 18 09:15:38 2009 -0400 @@ -33,6 +33,7 @@ .rcs - name of file as returned from CVS .revision - revision number as tuple .tags - list of tags on the file + .synthetic - is this a synthetic "file ... added on ..." revision? ''' def __init__(self, **entries): self.__dict__.update(entries) @@ -107,6 +108,8 @@ re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?') re_70 = re.compile('branches: (.+);$') + file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch') + prefix = '' # leading path to strip of what we get from CVS if directory is None: @@ -279,7 +282,8 @@ assert match, _('expected revision number') e = logentry(rcs=scache(rcs), file=scache(filename), revision=tuple([int(x) for x in match.group(1).split('.')]), - branches=[], parent=None) + branches=[], parent=None, + synthetic=False) state = 6 elif state == 6: @@ -338,6 +342,22 @@ else: e.comment.append(line) + # When a file is added on a branch B1, CVS creates a synthetic + # dead trunk revision 1.1 so that the branch has a root. + # Likewise, if you merge such a file to a later branch B2 (one + # that already existed when the file was added on B1), CVS + # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop + # these revisions now, but mark them synthetic so + # createchangeset() can take care of them. + if (store and + e.dead and + e.revision[-1] == 1 and # 1.1 or 1.1.x.1 + len(e.comment) == 1 and + file_added_re.match(e.comment[0])): + ui.debug(_('found synthetic rev in %s: %r\n') + % (e.rcs, e.comment[0])) + e.synthetic = True + if store: # clean up the results and save in the log. store = False @@ -399,6 +419,7 @@ .entries - list of logentry objects in this changeset .parents - list of one or two parent changesets .tags - list of tags on this changeset + .synthetic - from synthetic revision "file ... added on branch ..." ''' def __init__(self, **entries): self.__dict__.update(entries) @@ -438,6 +459,19 @@ files[e.file] = True c.date = e.date # changeset date is date of latest commit in it + # Mark synthetic changesets + + for c in changesets: + # Synthetic revisions always get their own changeset, because + # the log message includes the filename. E.g. if you add file3 + # and file4 on a branch, you get four log entries and three + # changesets: + # "File file3 was added on branch ..." (synthetic, 1 entry) + # "File file4 was added on branch ..." (synthetic, 1 entry) + # "Add file3 and file4 to fix ..." (real, 2 entries) + # Hence the check for 1 entry here. + c.synthetic = (len(c.entries) == 1 and c.entries[0].synthetic) + # Sort files in each changeset for c in changesets: @@ -546,7 +580,20 @@ c.parents = [] if p is not None: - c.parents.append(changesets[p]) + p = changesets[p] + + # Ensure no changeset has a synthetic changeset as a parent. + while p.synthetic: + assert len(p.parents) <= 1, \ + _('synthetic changeset cannot have multiple parents') + if p.parents: + p = p.parents[0] + else: + p = None + break + + if p is not None: + c.parents.append(p) if mergefrom: m = mergefrom.search(c.comment) @@ -582,6 +629,15 @@ branches[c.branch] = i i += 1 + # Drop synthetic changesets (safe now that we have ensured no other + # changesets can have them as parents). + i = 0 + while i < len(changesets): + if changesets[i].synthetic: + del changesets[i] + else: + i += 1 + # Number changesets for i, c in enumerate(changesets): diff -r 2bc14da14992 -r 02981000012e tests/test-convert-cvs-synthetic --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test-convert-cvs-synthetic Wed Mar 18 09:15:38 2009 -0400 @@ -0,0 +1,65 @@ +#!/bin/sh + +# This feature requires use of builtin cvsps! +"$TESTDIR/hghave" cvs || exit 80 + +# XXX lots of duplication with other test-convert-cvs* scripts + +set -e + +echo "[extensions]" >> $HGRCPATH +echo "convert = " >> $HGRCPATH +echo "[convert]" >> $HGRCPATH +echo "cvsps=builtin" >> $HGRCPATH + +echo % create cvs repository with one project +mkdir cvsrepo +cd cvsrepo +export CVSROOT=`pwd` +export CVS_OPTIONS=-f +cd .. + +filter='sed "s:$CVSROOT:*REPO*:g"' +cvscall() +{ + cvs -f "$@" | eval $filter +} + +cvscall -q -d "$CVSROOT" init +mkdir cvsrepo/proj + +cvscall co proj + +echo % create file1 on the trunk +cd proj +touch file1 +cvscall add file1 +cvscall ci -m"add file1 on trunk" file1 + +echo % create two branches +cvscall tag -b v1_0 +cvscall tag -b v1_1 + +echo % create file2 on branch v1_0 +cvs up -rv1_0 +touch file2 +cvscall add file2 +cvscall ci -m"add file2 on branch v1_0" file2 + +echo % create file3, file4 on branch v1_1 +cvs up -rv1_1 +touch file3 +touch file4 +cvscall add file3 file4 +cvscall ci -m"add file3, file4 on branch v1_1" file3 file4 + +echo % merge file2 from v1_0 to v1_1 +cvscall up -jv1_0 +cvscall ci -m"merge file2 from v1_0 to v1_1" + +echo % convert to hg +cd .. +hg convert proj proj.hg | eval $filter + +echo % hg log output +hg -R proj.hg log --template "{rev} {desc}\n" diff -r 2bc14da14992 -r 02981000012e tests/test-convert-cvs-synthetic.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test-convert-cvs-synthetic.out Wed Mar 18 09:15:38 2009 -0400 @@ -0,0 +1,72 @@ +% create cvs repository with one project +cvs checkout: Updating proj +% create file1 on the trunk +cvs add: scheduling file `file1' for addition +cvs add: use 'cvs commit' to add this file permanently +RCS file: *REPO*/proj/file1,v +done +Checking in file1; +*REPO*/proj/file1,v <-- file1 +initial revision: 1.1 +done +% create two branches +cvs tag: Tagging . +T file1 +cvs tag: Tagging . +T file1 +% create file2 on branch v1_0 +cvs update: Updating . +cvs add: scheduling file `file2' for addition on branch `v1_0' +cvs add: use 'cvs commit' to add this file permanently +RCS file: *REPO*/proj/Attic/file2,v +done +Checking in file2; +*REPO*/proj/Attic/file2,v <-- file2 +new revision: 1.1.2.1; previous revision: 1.1 +done +% create file3, file4 on branch v1_1 +cvs update: Updating . +cvs update: file2 is no longer in the repository +cvs add: scheduling file `file3' for addition on branch `v1_1' +cvs add: scheduling file `file4' for addition on branch `v1_1' +cvs add: use 'cvs commit' to add these files permanently +RCS file: *REPO*/proj/Attic/file3,v +done +Checking in file3; +*REPO*/proj/Attic/file3,v <-- file3 +new revision: 1.1.2.1; previous revision: 1.1 +done +RCS file: *REPO*/proj/Attic/file4,v +done +Checking in file4; +*REPO*/proj/Attic/file4,v <-- file4 +new revision: 1.1.2.1; previous revision: 1.1 +done +% merge file2 from v1_0 to v1_1 +cvs update: Updating . +U file2 +cvs commit: Examining . +Checking in file2; +*REPO*/proj/Attic/file2,v <-- file2 +new revision: 1.1.4.2; previous revision: 1.1.4.1 +done +% convert to hg +initializing destination proj.hg repository +using builtin cvsps +collecting CVS rlog +9 log entries +creating changesets +4 changeset entries +connecting to *REPO* +scanning source... +sorting... +converting... +3 add file1 on trunk +2 add file2 on branch v1_0 +1 add file3, file4 on branch v1_1 +0 merge file2 from v1_0 to v1_1 +% hg log output +3 merge file2 from v1_0 to v1_1 +2 add file3, file4 on branch v1_1 +1 add file2 on branch v1_0 +0 add file1 on trunk