--- a/hgext/convert/cvsps.py Sat Oct 05 10:29:34 2019 -0400
+++ b/hgext/convert/cvsps.py Sun Oct 06 09:45:02 2019 -0400
@@ -26,6 +26,7 @@
pickle = util.pickle
+
class logentry(object):
'''Class logentry has the following attributes:
.author - author name as CVS knows it
@@ -46,17 +47,22 @@
rlog output) or None
.branchpoints - the branches that start at the current entry or empty
'''
+
def __init__(self, **entries):
self.synthetic = False
self.__dict__.update(entries)
def __repr__(self):
- items = (r"%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
- return r"%s(%s)"%(type(self).__name__, r", ".join(items))
+ items = (
+ r"%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__)
+ )
+ return r"%s(%s)" % (type(self).__name__, r", ".join(items))
+
class logerror(Exception):
pass
+
def getrepopath(cvspath):
"""Return the repository path from a CVS path.
@@ -93,45 +99,52 @@
if atposition != -1:
start = atposition
- repopath = parts[-1][parts[-1].find('/', start):]
+ repopath = parts[-1][parts[-1].find('/', start) :]
return repopath
+
def createlog(ui, directory=None, root="", rlog=True, cache=None):
'''Collect the CVS rlog'''
# Because we store many duplicate commit log messages, reusing strings
# saves a lot of memory and pickle storage space.
_scache = {}
+
def scache(s):
"return a shared version of a string"
return _scache.setdefault(s, s)
ui.status(_('collecting CVS rlog\n'))
- log = [] # list of logentry objects containing the CVS state
+ log = [] # list of logentry objects containing the CVS state
# patterns to match in CVS (r)log output, by state of use
re_00 = re.compile(b'RCS file: (.+)$')
re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$')
re_02 = re.compile(b'cvs (r?log|server): (.+)\n$')
- re_03 = re.compile(b"(Cannot access.+CVSROOT)|"
- b"(can't create temporary directory.+)$")
+ re_03 = re.compile(
+ b"(Cannot access.+CVSROOT)|" b"(can't create temporary directory.+)$"
+ )
re_10 = re.compile(b'Working file: (.+)$')
re_20 = re.compile(b'symbolic names:')
re_30 = re.compile(b'\t(.+): ([\\d.]+)$')
re_31 = re.compile(b'----------------------------$')
- re_32 = re.compile(b'======================================='
- b'======================================$')
+ re_32 = re.compile(
+ b'======================================='
+ b'======================================$'
+ )
re_50 = re.compile(br'revision ([\d.]+)(\s+locked by:\s+.+;)?$')
- re_60 = re.compile(br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
- br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
- br'(\s+commitid:\s+([^;]+);)?'
- br'(.*mergepoint:\s+([^;]+);)?')
+ re_60 = re.compile(
+ br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
+ br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
+ br'(\s+commitid:\s+([^;]+);)?'
+ br'(.*mergepoint:\s+([^;]+);)?'
+ )
re_70 = re.compile(b'branches: (.+);$')
file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch')
- prefix = '' # leading path to strip of what we get from CVS
+ prefix = '' # leading path to strip of what we get from CVS
if directory is None:
# Current working directory
@@ -151,7 +164,7 @@
# Use the Root file in the sandbox, if it exists
try:
- root = open(os.path.join('CVS','Root'), 'rb').read().strip()
+ root = open(os.path.join('CVS', 'Root'), 'rb').read().strip()
except IOError:
pass
@@ -178,17 +191,20 @@
# are mapped to different cache file names.
cachefile = root.split(":") + [directory, "cache"]
cachefile = ['-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
- cachefile = os.path.join(cachedir,
- '.'.join([s for s in cachefile if s]))
+ cachefile = os.path.join(
+ cachedir, '.'.join([s for s in cachefile if s])
+ )
if cache == 'update':
try:
ui.note(_('reading cvs log cache %s\n') % cachefile)
oldlog = pickle.load(open(cachefile, 'rb'))
for e in oldlog:
- if not (util.safehasattr(e, 'branchpoints') and
- util.safehasattr(e, 'commitid') and
- util.safehasattr(e, 'mergepoint')):
+ if not (
+ util.safehasattr(e, 'branchpoints')
+ and util.safehasattr(e, 'commitid')
+ and util.safehasattr(e, 'mergepoint')
+ ):
ui.status(_('ignoring old cache\n'))
oldlog = []
break
@@ -198,7 +214,7 @@
ui.note(_('error reading cache: %r\n') % e)
if oldlog:
- date = oldlog[-1].date # last commit date as a (time,tz) tuple
+ date = oldlog[-1].date # last commit date as a (time,tz) tuple
date = dateutil.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
# build the CVS commandline
@@ -220,11 +236,11 @@
cmd.append(directory)
# state machine begins here
- tags = {} # dictionary of revisions on current file with their tags
- branchmap = {} # mapping between branch names and revision numbers
+ tags = {} # dictionary of revisions on current file with their tags
+ branchmap = {} # mapping between branch names and revision numbers
rcsmap = {}
state = 0
- store = False # set when a new record can be appended
+ store = False # set when a new record can be appended
cmd = [procutil.shellquote(arg) for arg in cmd]
ui.note(_("running %s\n") % (' '.join(cmd)))
@@ -239,7 +255,7 @@
peek = util.fromnativeeol(pfp.readline())
if line.endswith('\n'):
line = line[:-1]
- #ui.debug('state=%d line=%r\n' % (state, line))
+ # ui.debug('state=%d line=%r\n' % (state, line))
if state == 0:
# initial state, consume input until we see 'RCS file'
@@ -250,7 +266,7 @@
if rlog:
filename = util.normpath(rcs[:-2])
if filename.startswith(prefix):
- filename = filename[len(prefix):]
+ filename = filename[len(prefix) :]
if filename.startswith('/'):
filename = filename[1:]
if filename.startswith('Attic/'):
@@ -310,8 +326,9 @@
if re_31.match(line):
state = 5
else:
- assert not re_32.match(line), _('must have at least '
- 'some revisions')
+ assert not re_32.match(line), _(
+ 'must have at least ' 'some revisions'
+ )
elif state == 5:
# expecting revision number and possibly (ignored) lock indication
@@ -319,15 +336,16 @@
# as this state is re-entered for subsequent revisions of a file.
match = re_50.match(line)
assert match, _('expected revision number')
- e = logentry(rcs=scache(rcs),
- file=scache(filename),
- revision=tuple([int(x) for x in
- match.group(1).split('.')]),
- branches=[],
- parent=None,
- commitid=None,
- mergepoint=None,
- branchpoints=set())
+ e = logentry(
+ rcs=scache(rcs),
+ file=scache(filename),
+ revision=tuple([int(x) for x in match.group(1).split('.')]),
+ branches=[],
+ parent=None,
+ commitid=None,
+ mergepoint=None,
+ branchpoints=set(),
+ )
state = 6
@@ -343,9 +361,10 @@
if len(d.split()) != 3:
# cvs log dates always in GMT
d = d + ' UTC'
- e.date = dateutil.parsedate(d, ['%y/%m/%d %H:%M:%S',
- '%Y/%m/%d %H:%M:%S',
- '%Y-%m-%d %H:%M:%S'])
+ e.date = dateutil.parsedate(
+ d,
+ ['%y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S'],
+ )
e.author = scache(match.group(2))
e.dead = match.group(3).lower() == 'dead'
@@ -359,18 +378,19 @@
else:
e.lines = None
- if match.group(7): # cvs 1.12 commitid
+ if match.group(7): # cvs 1.12 commitid
e.commitid = match.group(8)
- if match.group(9): # cvsnt mergepoint
+ if match.group(9): # cvsnt mergepoint
myrev = match.group(10).split('.')
- if len(myrev) == 2: # head
+ if len(myrev) == 2: # head
e.mergepoint = 'HEAD'
else:
myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
branches = [b for b in branchmap if branchmap[b] == myrev]
- assert len(branches) == 1, ('unknown branch: %s'
- % e.mergepoint)
+ assert len(branches) == 1, (
+ 'unknown branch: %s' % e.mergepoint
+ )
e.mergepoint = branches[0]
e.comment = []
@@ -381,8 +401,10 @@
# or store the commit log message otherwise
m = re_70.match(line)
if m:
- e.branches = [tuple([int(y) for y in x.strip().split('.')])
- for x in m.group(1).split(';')]
+ e.branches = [
+ tuple([int(y) for y in x.strip().split('.')])
+ for x in m.group(1).split(';')
+ ]
state = 8
elif re_31.match(line) and re_50.match(peek):
state = 5
@@ -417,13 +439,16 @@
# creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
# these revisions now, but mark them synthetic so
# createchangeset() can take care of them.
- if (store and
- e.dead and
- e.revision[-1] == 1 and # 1.1 or 1.1.x.1
- len(e.comment) == 1 and
- file_added_re.match(e.comment[0])):
- ui.debug('found synthetic revision in %s: %r\n'
- % (e.rcs, e.comment[0]))
+ if (
+ store
+ and e.dead
+ and e.revision[-1] == 1
+ and len(e.comment) == 1 # 1.1 or 1.1.x.1
+ and file_added_re.match(e.comment[0])
+ ):
+ ui.debug(
+ 'found synthetic revision in %s: %r\n' % (e.rcs, e.comment[0])
+ )
e.synthetic = True
if store:
@@ -442,13 +467,13 @@
branchpoints = set()
for branch, revision in branchmap.iteritems():
revparts = tuple([int(i) for i in revision.split('.')])
- if len(revparts) < 2: # bad tags
+ if len(revparts) < 2: # bad tags
continue
if revparts[-2] == 0 and revparts[-1] % 2 == 0:
# normal branch
if revparts[:-2] == e.revision:
branchpoints.add(branch)
- elif revparts == (1, 1, 1): # vendor branch
+ elif revparts == (1, 1, 1): # vendor branch
if revparts in e.branches:
branchpoints.add(branch)
e.branchpoints = branchpoints
@@ -458,8 +483,9 @@
rcsmap[e.rcs.replace('/Attic/', '/')] = e.rcs
if len(log) % 100 == 0:
- ui.status(stringutil.ellipsis('%d %s' % (len(log), e.file), 80)
- + '\n')
+ ui.status(
+ stringutil.ellipsis('%d %s' % (len(log), e.file), 80) + '\n'
+ )
log.sort(key=lambda x: (x.rcs, x.revision))
@@ -487,8 +513,12 @@
log.sort(key=lambda x: x.date)
if oldlog and oldlog[-1].date >= log[0].date:
- raise logerror(_('log cache overlaps with new log entries,'
- ' re-run without cache.'))
+ raise logerror(
+ _(
+ 'log cache overlaps with new log entries,'
+ ' re-run without cache.'
+ )
+ )
log = oldlog + log
@@ -502,6 +532,7 @@
encodings = ui.configlist('convert', 'cvsps.logencoding')
if encodings:
+
def revstr(r):
# this is needed, because logentry.revision is a tuple of "int"
# (e.g. (1, 2) for "1.2")
@@ -511,24 +542,33 @@
comment = entry.comment
for e in encodings:
try:
- entry.comment = comment.decode(
- pycompat.sysstr(e)).encode('utf-8')
+ entry.comment = comment.decode(pycompat.sysstr(e)).encode(
+ 'utf-8'
+ )
if ui.debugflag:
- ui.debug("transcoding by %s: %s of %s\n" %
- (e, revstr(entry.revision), entry.file))
+ ui.debug(
+ "transcoding by %s: %s of %s\n"
+ % (e, revstr(entry.revision), entry.file)
+ )
break
except UnicodeDecodeError:
- pass # try next encoding
- except LookupError as inst: # unknown encoding, maybe
- raise error.Abort(inst,
- hint=_('check convert.cvsps.logencoding'
- ' configuration'))
+ pass # try next encoding
+ except LookupError as inst: # unknown encoding, maybe
+ raise error.Abort(
+ inst,
+ hint=_(
+ 'check convert.cvsps.logencoding' ' configuration'
+ ),
+ )
else:
- raise error.Abort(_("no encoding can transcode"
- " CVS log message for %s of %s")
- % (revstr(entry.revision), entry.file),
- hint=_('check convert.cvsps.logencoding'
- ' configuration'))
+ raise error.Abort(
+ _(
+ "no encoding can transcode"
+ " CVS log message for %s of %s"
+ )
+ % (revstr(entry.revision), entry.file),
+ hint=_('check convert.cvsps.logencoding' ' configuration'),
+ )
hook.hook(ui, None, "cvslog", True, log=log)
@@ -550,14 +590,16 @@
.mergepoint- the branch that has been merged from or None
.branchpoints- the branches that start at the current entry or empty
'''
+
def __init__(self, **entries):
self.id = None
self.synthetic = False
self.__dict__.update(entries)
def __repr__(self):
- items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
- return "%s(%s)"%(type(self).__name__, ", ".join(items))
+ items = ("%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__))
+ return "%s(%s)" % (type(self).__name__, ", ".join(items))
+
def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
'''Convert log into changesets.'''
@@ -574,9 +616,17 @@
mindate[e.commitid] = min(e.date, mindate[e.commitid])
# Merge changesets
- log.sort(key=lambda x: (mindate.get(x.commitid, (-1, 0)),
- x.commitid or '', x.comment,
- x.author, x.branch or '', x.date, x.branchpoints))
+ log.sort(
+ key=lambda x: (
+ mindate.get(x.commitid, (-1, 0)),
+ x.commitid or '',
+ x.comment,
+ x.author,
+ x.branch or '',
+ x.date,
+ x.branchpoints,
+ )
+ )
changesets = []
files = set()
@@ -599,22 +649,35 @@
# first changeset and bar the next and MYBRANCH and MYBRANCH2
# should both start off of the bar changeset. No provisions are
# made to ensure that this is, in fact, what happens.
- if not (c and e.branchpoints == c.branchpoints and
- (# cvs commitids
- (e.commitid is not None and e.commitid == c.commitid) or
- (# no commitids, use fuzzy commit detection
- (e.commitid is None or c.commitid is None) and
- e.comment == c.comment and
- e.author == c.author and
- e.branch == c.branch and
- ((c.date[0] + c.date[1]) <=
- (e.date[0] + e.date[1]) <=
- (c.date[0] + c.date[1]) + fuzz) and
- e.file not in files))):
- c = changeset(comment=e.comment, author=e.author,
- branch=e.branch, date=e.date,
- entries=[], mergepoint=e.mergepoint,
- branchpoints=e.branchpoints, commitid=e.commitid)
+ if not (
+ c
+ and e.branchpoints == c.branchpoints
+ and ( # cvs commitids
+ (e.commitid is not None and e.commitid == c.commitid)
+ or ( # no commitids, use fuzzy commit detection
+ (e.commitid is None or c.commitid is None)
+ and e.comment == c.comment
+ and e.author == c.author
+ and e.branch == c.branch
+ and (
+ (c.date[0] + c.date[1])
+ <= (e.date[0] + e.date[1])
+ <= (c.date[0] + c.date[1]) + fuzz
+ )
+ and e.file not in files
+ )
+ )
+ ):
+ c = changeset(
+ comment=e.comment,
+ author=e.author,
+ branch=e.branch,
+ date=e.date,
+ entries=[],
+ mergepoint=e.mergepoint,
+ branchpoints=e.branchpoints,
+ commitid=e.commitid,
+ )
changesets.append(c)
files = set()
@@ -624,7 +687,7 @@
c.entries.append(e)
files.add(e.file)
- c.date = e.date # changeset date is date of latest commit in it
+ c.date = e.date # changeset date is date of latest commit in it
# Mark synthetic changesets
@@ -665,6 +728,7 @@
# Sort changesets by date
odd = set()
+
def cscmp(l, r):
d = sum(l.date) - sum(r.date)
if d:
@@ -745,8 +809,8 @@
if mergefrom:
mergefrom = re.compile(mergefrom)
- versions = {} # changeset index where we saw any particular file version
- branches = {} # changeset index where we saw a branch
+ versions = {} # changeset index where we saw any particular file version
+ branches = {} # changeset index where we saw a branch
n = len(changesets)
i = 0
while i < n:
@@ -777,8 +841,9 @@
# Ensure no changeset has a synthetic changeset as a parent.
while p.synthetic:
- assert len(p.parents) <= 1, (
- _('synthetic changeset cannot have multiple parents'))
+ assert len(p.parents) <= 1, _(
+ 'synthetic changeset cannot have multiple parents'
+ )
if p.parents:
p = p.parents[0]
else:
@@ -802,9 +867,13 @@
try:
candidate = changesets[branches[m]]
except KeyError:
- ui.warn(_("warning: CVS commit message references "
- "non-existent branch %r:\n%s\n")
- % (pycompat.bytestr(m), c.comment))
+ ui.warn(
+ _(
+ "warning: CVS commit message references "
+ "non-existent branch %r:\n%s\n"
+ )
+ % (pycompat.bytestr(m), c.comment)
+ )
if m in branches and c.branch != m and not candidate.synthetic:
c.parents.append(candidate)
@@ -816,15 +885,19 @@
if m == 'HEAD':
m = None
else:
- m = None # if no group found then merge to HEAD
+ m = None # if no group found then merge to HEAD
if m in branches and c.branch != m:
# insert empty changeset for merge
cc = changeset(
- author=c.author, branch=m, date=c.date,
+ author=c.author,
+ branch=m,
+ date=c.date,
comment='convert-repo: CVS merge from branch %s'
% c.branch,
- entries=[], tags=[],
- parents=[changesets[branches[m]], c])
+ entries=[],
+ tags=[],
+ parents=[changesets[branches[m]], c],
+ )
changesets.insert(i + 1, cc)
branches[m] = i + 1
@@ -853,8 +926,10 @@
if odd:
for l, r in odd:
if l.id is not None and r.id is not None:
- ui.warn(_('changeset %d is both before and after %d\n')
- % (l.id, r.id))
+ ui.warn(
+ _('changeset %d is both before and after %d\n')
+ % (l.id, r.id)
+ )
ui.status(_('%d changeset entries\n') % len(changesets))
@@ -886,7 +961,7 @@
else:
log = createlog(ui, root=opts["root"], cache=cache)
except logerror as e:
- ui.write("%r\n"%e)
+ ui.write("%r\n" % e)
return
changesets = createchangeset(ui, log, opts["fuzz"])
@@ -895,14 +970,16 @@
# Print changesets (optionally filtered)
off = len(revisions)
- branches = {} # latest version number in each branch
- ancestors = {} # parent branch
+ branches = {} # latest version number in each branch
+ ancestors = {} # parent branch
for cs in changesets:
if opts["ancestors"]:
if cs.branch not in branches and cs.parents and cs.parents[0].id:
- ancestors[cs.branch] = (changesets[cs.parents[0].id - 1].branch,
- cs.parents[0].id)
+ ancestors[cs.branch] = (
+ changesets[cs.parents[0].id - 1].branch,
+ cs.parents[0].id,
+ )
branches[cs.branch] = cs.id
# limit by branches
@@ -914,19 +991,35 @@
# bug-for-bug compatibility with cvsps.
ui.write('---------------------\n')
ui.write(('PatchSet %d \n' % cs.id))
- ui.write(('Date: %s\n' % dateutil.datestr(cs.date,
- '%Y/%m/%d %H:%M:%S %1%2')))
+ ui.write(
+ (
+ 'Date: %s\n'
+ % dateutil.datestr(cs.date, '%Y/%m/%d %H:%M:%S %1%2')
+ )
+ )
ui.write(('Author: %s\n' % cs.author))
ui.write(('Branch: %s\n' % (cs.branch or 'HEAD')))
- ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
- ','.join(cs.tags) or '(none)')))
+ ui.write(
+ (
+ 'Tag%s: %s \n'
+ % (
+ ['', 's'][len(cs.tags) > 1],
+ ','.join(cs.tags) or '(none)',
+ )
+ )
+ )
if cs.branchpoints:
- ui.write(('Branchpoints: %s \n') %
- ', '.join(sorted(cs.branchpoints)))
+ ui.write(
+ 'Branchpoints: %s \n' % ', '.join(sorted(cs.branchpoints))
+ )
if opts["parents"] and cs.parents:
if len(cs.parents) > 1:
- ui.write(('Parents: %s\n' %
- (','.join([(b"%d" % p.id) for p in cs.parents]))))
+ ui.write(
+ (
+ 'Parents: %s\n'
+ % (','.join([(b"%d" % p.id) for p in cs.parents]))
+ )
+ )
else:
ui.write(('Parent: %d\n' % cs.parents[0].id))
@@ -939,28 +1032,30 @@
if r:
ui.write(('Ancestors: %s\n' % (','.join(r))))
- ui.write(('Log:\n'))
+ ui.write('Log:\n')
ui.write('%s\n\n' % cs.comment)
- ui.write(('Members: \n'))
+ ui.write('Members: \n')
for f in cs.entries:
fn = f.file
if fn.startswith(opts["prefix"]):
- fn = fn[len(opts["prefix"]):]
- ui.write('\t%s:%s->%s%s \n' % (
+ fn = fn[len(opts["prefix"]) :]
+ ui.write(
+ '\t%s:%s->%s%s \n'
+ % (
fn,
'.'.join([b"%d" % x for x in f.parent]) or 'INITIAL',
'.'.join([(b"%d" % x) for x in f.revision]),
- ['', '(DEAD)'][f.dead]))
+ ['', '(DEAD)'][f.dead],
+ )
+ )
ui.write('\n')
# have we seen the start tag?
if revisions and off:
- if (revisions[0] == (b"%d" % cs.id) or
- revisions[0] in cs.tags):
+ if revisions[0] == (b"%d" % cs.id) or revisions[0] in cs.tags:
off = False
# see if we reached the end tag
if len(revisions) > 1 and not off:
- if (revisions[1] == (b"%d" % cs.id) or
- revisions[1] in cs.tags):
+ if revisions[1] == (b"%d" % cs.id) or revisions[1] in cs.tags:
break