Move convert-repo to hgext/convert/__init__.py
authorThomas Arendsen Hein <thomas@intevation.de>
Wed, 06 Jun 2007 19:49:47 +0200
changeset 4512 91709ba3cc88
parent 4511 1d46169ec197
child 4513 ac2fe196ac9b
Move convert-repo to hgext/convert/__init__.py
contrib/convert-repo
hgext/convert/__init__.py
--- a/contrib/convert-repo	Wed Jun 06 19:06:43 2007 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,731 +0,0 @@
-#!/usr/bin/env python
-#
-# This is a generalized framework for converting between SCM
-# repository formats.
-#
-# To use, run:
-#
-# convert-repo <source> [<dest> [<mapfile>]]
-#
-# Currently accepted source formats: git, cvs
-# Currently accepted destination formats: hg
-#
-# If destination isn't given, a new Mercurial repo named <src>-hg will
-# be created. If <mapfile> isn't given, it will be put in a default
-# location (<dest>/.hg/shamap by default)
-#
-# The <mapfile> is a simple text file that maps each source commit ID to
-# the destination ID for that revision, like so:
-#
-# <source ID> <destination ID>
-#
-# If the file doesn't exist, it's automatically created.  It's updated
-# on each commit copied, so convert-repo can be interrupted and can
-# be run repeatedly to copy new commits.
-
-import sys, os, zlib, sha, time, re, locale, socket
-os.environ["HGENCODING"] = "utf-8"
-from mercurial import hg, ui, util, fancyopts
-
-class Abort(Exception): pass
-class NoRepo(Exception): pass
-
-class commit(object):
-    def __init__(self, **parts):
-        for x in "author date desc parents".split():
-            if not x in parts:
-                abort("commit missing field %s\n" % x)
-        self.__dict__.update(parts)
-
-quiet = 0
-def status(msg):
-    if not quiet: sys.stdout.write(str(msg))
-
-def warn(msg):
-    sys.stderr.write(str(msg))
-
-def abort(msg):
-    raise Abort(msg)
-
-def recode(s):
-    try:
-        return s.decode("utf-8").encode("utf-8")
-    except:
-        try:
-            return s.decode("latin-1").encode("utf-8")
-        except:
-            return s.decode("utf-8", "replace").encode("utf-8")
-
-class converter_source(object):
-    """Conversion source interface"""
-
-    def __init__(self, path):
-        """Initialize conversion source (or raise NoRepo("message")
-        exception if path is not a valid repository)"""
-        raise NotImplementedError()
-
-    def getheads(self):
-        """Return a list of this repository's heads"""
-        raise NotImplementedError()
-
-    def getfile(self, name, rev):
-        """Return file contents as a string"""
-        raise NotImplementedError()
-
-    def getmode(self, name, rev):
-        """Return file mode, eg. '', 'x', or 'l'"""
-        raise NotImplementedError()
-
-    def getchanges(self, version):
-        """Return sorted list of (filename, id) tuples for all files changed in rev.
-        
-        id just tells us which revision to return in getfile(), e.g. in
-        git it's an object hash."""
-        raise NotImplementedError()
-
-    def getcommit(self, version):
-        """Return the commit object for version"""
-        raise NotImplementedError()
-
-    def gettags(self):
-        """Return the tags as a dictionary of name: revision"""
-        raise NotImplementedError()
-
-class converter_sink(object):
-    """Conversion sink (target) interface"""
-
-    def __init__(self, path):
-        """Initialize conversion sink (or raise NoRepo("message")
-        exception if path is not a valid repository)"""
-        raise NotImplementedError()
-
-    def getheads(self):
-        """Return a list of this repository's heads"""
-        raise NotImplementedError()
-
-    def mapfile(self):
-        """Path to a file that will contain lines
-        source_rev_id sink_rev_id
-        mapping equivalent revision identifiers for each system."""
-        raise NotImplementedError()
-
-    def putfile(self, f, e, data):
-        """Put file for next putcommit().
-        f: path to file
-        e: '', 'x', or 'l' (regular file, executable, or symlink)
-        data: file contents"""
-        raise NotImplementedError()
-
-    def delfile(self, f):
-        """Delete file for next putcommit().
-        f: path to file"""
-        raise NotImplementedError()
-
-    def putcommit(self, files, parents, commit):
-        """Create a revision with all changed files listed in 'files'
-        and having listed parents. 'commit' is a commit object containing
-        at a minimum the author, date, and message for this changeset.
-        Called after putfile() and delfile() calls. Note that the sink
-        repository is not told to update itself to a particular revision
-        (or even what that revision would be) before it receives the
-        file data."""
-        raise NotImplementedError()
-
-    def puttags(self, tags):
-        """Put tags into sink.
-        tags: {tagname: sink_rev_id, ...}"""
-        raise NotImplementedError()
-
-
-# CVS conversion code inspired by hg-cvs-import and git-cvsimport
-class convert_cvs(converter_source):
-    def __init__(self, path):
-        self.path = path
-        cvs = os.path.join(path, "CVS")
-        if not os.path.exists(cvs):
-            raise NoRepo("couldn't open CVS repo %s" % path)
-
-        self.changeset = {}
-        self.files = {}
-        self.tags = {}
-        self.lastbranch = {}
-        self.parent = {}
-        self.socket = None
-        self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
-        self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
-        self.encoding = locale.getpreferredencoding()
-        self._parse()
-        self._connect()
-
-    def _parse(self):
-        if self.changeset:
-            return
-
-        d = os.getcwd()
-        try:
-            os.chdir(self.path)
-            id = None
-            state = 0
-            for l in os.popen("cvsps -A -u --cvs-direct -q"):
-                if state == 0: # header
-                    if l.startswith("PatchSet"):
-                        id = l[9:-2]
-                    elif l.startswith("Date"):
-                        date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
-                        date = util.datestr(date)
-                    elif l.startswith("Branch"):
-                        branch = l[8:-1]
-                        self.parent[id] = self.lastbranch.get(branch,'bad')
-                        self.lastbranch[branch] = id
-                    elif l.startswith("Ancestor branch"):
-                        ancestor = l[17:-1]
-                        self.parent[id] = self.lastbranch[ancestor]
-                    elif l.startswith("Author"):
-                        author = self.recode(l[8:-1])
-                    elif l.startswith("Tag: "):
-                        t = l[5:-1].rstrip()
-                        if t != "(none)":
-                            self.tags[t] = id
-                    elif l.startswith("Log:"):
-                        state = 1
-                        log = ""
-                elif state == 1: # log
-                    if l == "Members: \n":
-                        files = {}
-                        log = self.recode(log[:-1])
-                        if log.isspace():
-                            log = "*** empty log message ***\n"
-                        state = 2
-                    else:
-                        log += l
-                elif state == 2:
-                    if l == "\n": #
-                        state = 0
-                        p = [self.parent[id]]
-                        if id == "1":
-                            p = []
-                        c = commit(author=author, date=date, parents=p,
-                                   desc=log, branch=branch)
-                        self.changeset[id] = c
-                        self.files[id] = files
-                    else:
-                        file,rev = l[1:-2].rsplit(':',1)
-                        rev = rev.split("->")[1]
-                        files[file] = rev
-
-            self.heads = self.lastbranch.values()
-        finally:
-            os.chdir(d)
-
-    def _connect(self):
-        root = self.cvsroot
-        conntype = None
-        user, host = None, None
-        cmd = ['cvs', 'server']
-
-        status("connecting to %s\n" % root)
-
-        if root.startswith(":pserver:"):
-            root = root[9:]
-            m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)', root)
-            if m:
-                conntype = "pserver"
-                user, passw, serv, port, root = m.groups()
-                if not user:
-                    user = "anonymous"
-                rr = ":pserver:" + user + "@" + serv + ":" +  root
-                if port:
-                    rr2, port = "-", int(port)
-                else:
-                    rr2, port = rr, 2401
-                rr += str(port)
-
-                if not passw:
-                    passw = "A"
-                    pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
-                    for l in pf:
-                        # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
-                        m = re.match(r'(/\d+\s+/)?(.*)', l)
-                        l = m.group(2)
-                        w, p = l.split(' ', 1)
-                        if w in [rr, rr2]:
-                            passw = p
-                            break
-                    pf.close()
-
-                sck = socket.socket()
-                sck.connect((serv, port))
-                sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw, "END AUTH REQUEST", ""]))
-                if sck.recv(128) != "I LOVE YOU\n":
-                    raise NoRepo("CVS pserver authentication failed")
-
-                self.writep = self.readp = sck.makefile('r+')
-
-        if not conntype and root.startswith(":local:"):
-            conntype = "local"
-            root = root[7:]
-
-        if not conntype:
-            # :ext:user@host/home/user/path/to/cvsroot
-            if root.startswith(":ext:"):
-                root = root[5:]
-            m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
-            if not m:
-                conntype = "local"
-            else:
-                conntype = "rsh"
-                user, host, root = m.group(1), m.group(2), m.group(3)
-
-        if conntype != "pserver":
-            if conntype == "rsh": 
-                rsh = os.environ.get("CVS_RSH" or "rsh")
-                if user:
-                    cmd = [rsh, '-l', user, host] + cmd
-                else:
-                    cmd = [rsh, host] + cmd
-
-            self.writep, self.readp = os.popen2(cmd)
-
-        self.realroot = root
-
-        self.writep.write("Root %s\n" % root)
-        self.writep.write("Valid-responses ok error Valid-requests Mode"
-                          " M Mbinary E Checked-in Created Updated"
-                          " Merged Removed\n")
-        self.writep.write("valid-requests\n")
-        self.writep.flush()
-        r = self.readp.readline()
-        if not r.startswith("Valid-requests"):
-            abort("server sucks\n")
-        if "UseUnchanged" in r:
-            self.writep.write("UseUnchanged\n")
-            self.writep.flush()
-            r = self.readp.readline()
-
-    def getheads(self):
-        return self.heads
-
-    def _getfile(self, name, rev):
-        if rev.endswith("(DEAD)"):
-            raise IOError
-
-        args = ("-N -P -kk -r %s --" % rev).split()
-        args.append(os.path.join(self.cvsrepo, name))
-        for x in args:
-            self.writep.write("Argument %s\n" % x)
-        self.writep.write("Directory .\n%s\nco\n" % self.realroot)
-        self.writep.flush()
-
-        data = ""
-        while 1:
-            line = self.readp.readline()
-            if line.startswith("Created ") or line.startswith("Updated "):
-                self.readp.readline() # path
-                self.readp.readline() # entries
-                mode = self.readp.readline()[:-1]
-                count = int(self.readp.readline()[:-1])
-                data = self.readp.read(count)
-            elif line.startswith(" "):
-                data += line[1:]
-            elif line.startswith("M "):
-                pass
-            elif line.startswith("Mbinary "):
-                count = int(self.readp.readline()[:-1])
-                data = self.readp.read(count)
-            else:
-                if line == "ok\n":
-                    return (data, "x" in mode and "x" or "")
-                elif line.startswith("E "):
-                    warn("cvs server: %s\n" % line[2:])
-                elif line.startswith("Remove"):
-                    l = self.readp.readline()
-                    l = self.readp.readline()
-                    if l != "ok\n":
-                        abort("unknown CVS response: %s\n" % l)
-                else:
-                    abort("unknown CVS response: %s\n" % line)
-
-    def getfile(self, file, rev):
-        data, mode = self._getfile(file, rev)
-        self.modecache[(file, rev)] = mode
-        return data
-
-    def getmode(self, file, rev):
-        return self.modecache[(file, rev)]
-
-    def getchanges(self, rev):
-        self.modecache = {}
-        files = self.files[rev]
-        cl = files.items()
-        cl.sort()
-        return cl
-
-    def recode(self, text):
-        return text.decode(self.encoding, "replace").encode("utf-8")
-
-    def getcommit(self, rev):
-        return self.changeset[rev]
-
-    def gettags(self):
-        return self.tags
-
-class convert_git(converter_source):
-    def __init__(self, path):
-        if os.path.isdir(path + "/.git"):
-            path += "/.git"
-        self.path = path
-        if not os.path.exists(path + "/objects"):
-            raise NoRepo("couldn't open GIT repo %s" % path)
-
-    def getheads(self):
-        fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
-        return [fh.read()[:-1]]
-
-    def catfile(self, rev, type):
-        if rev == "0" * 40: raise IOError()
-        fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
-        return fh.read()
-
-    def getfile(self, name, rev):
-        return self.catfile(rev, "blob")
-
-    def getmode(self, name, rev):
-        return self.modecache[(name, rev)]
-
-    def getchanges(self, version):
-        self.modecache = {}
-        fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
-        changes = []
-        for l in fh:
-            if "\t" not in l: continue
-            m, f = l[:-1].split("\t")
-            m = m.split()
-            h = m[3]
-            p = (m[1] == "100755")
-            s = (m[1] == "120000")
-            self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
-            changes.append((f, h))
-        return changes
-
-    def getcommit(self, version):
-        c = self.catfile(version, "commit") # read the commit hash
-        end = c.find("\n\n")
-        message = c[end+2:]
-        message = recode(message)
-        l = c[:end].splitlines()
-        manifest = l[0].split()[1]
-        parents = []
-        for e in l[1:]:
-            n,v = e.split(" ", 1)
-            if n == "author":
-                p = v.split()
-                tm, tz = p[-2:]
-                author = " ".join(p[:-2])
-                if author[0] == "<": author = author[1:-1]
-                author = recode(author)
-            if n == "committer":
-                p = v.split()
-                tm, tz = p[-2:]
-                committer = " ".join(p[:-2])
-                if committer[0] == "<": committer = committer[1:-1]
-                committer = recode(committer)
-                message += "\ncommitter: %s\n" % committer
-            if n == "parent": parents.append(v)
-
-        tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
-        tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
-        date = tm + " " + str(tz)
-
-        c = commit(parents=parents, date=date, author=author, desc=message)
-        return c
-
-    def gettags(self):
-        tags = {}
-        fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
-        prefix = 'refs/tags/'
-        for line in fh:
-            line = line.strip()
-            if not line.endswith("^{}"):
-                continue
-            node, tag = line.split(None, 1)
-            if not tag.startswith(prefix):
-                continue
-            tag = tag[len(prefix):-3]
-            tags[tag] = node
-
-        return tags
-
-class convert_mercurial(converter_sink):
-    def __init__(self, path):
-        self.path = path
-        u = ui.ui()
-        try:
-            self.repo = hg.repository(u, path)
-        except:
-            raise NoRepo("could open hg repo %s" % path)
-
-    def mapfile(self):
-        return os.path.join(self.path, ".hg", "shamap")
-
-    def getheads(self):
-        h = self.repo.changelog.heads()
-        return [ hg.hex(x) for x in h ]
-
-    def putfile(self, f, e, data):
-        self.repo.wwrite(f, data, e)
-        if self.repo.dirstate.state(f) == '?':
-            self.repo.dirstate.update([f], "a")
-
-    def delfile(self, f):
-        try:
-            os.unlink(self.repo.wjoin(f))
-            #self.repo.remove([f])
-        except:
-            pass
-
-    def putcommit(self, files, parents, commit):
-        seen = {}
-        pl = []
-        for p in parents:
-            if p not in seen:
-                pl.append(p)
-                seen[p] = 1
-        parents = pl
-
-        if len(parents) < 2: parents.append("0" * 40)
-        if len(parents) < 2: parents.append("0" * 40)
-        p2 = parents.pop(0)
-
-        text = commit.desc
-        extra = {}
-        try:
-            extra["branch"] = commit.branch
-        except AttributeError:
-            pass
-
-        while parents:
-            p1 = p2
-            p2 = parents.pop(0)
-            a = self.repo.rawcommit(files, text, commit.author, commit.date,
-                                    hg.bin(p1), hg.bin(p2), extra=extra)
-            text = "(octopus merge fixup)\n"
-            p2 = hg.hex(self.repo.changelog.tip())
-
-        return p2
-
-    def puttags(self, tags):
-        try:
-            old = self.repo.wfile(".hgtags").read()
-            oldlines = old.splitlines(1)
-            oldlines.sort()
-        except:
-            oldlines = []
-
-        k = tags.keys()
-        k.sort()
-        newlines = []
-        for tag in k:
-            newlines.append("%s %s\n" % (tags[tag], tag))
-
-        newlines.sort()
-
-        if newlines != oldlines:
-            status("updating tags\n")
-            f = self.repo.wfile(".hgtags", "w")
-            f.write("".join(newlines))
-            f.close()
-            if not oldlines: self.repo.add([".hgtags"])
-            date = "%s 0" % int(time.mktime(time.gmtime()))
-            self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
-                                date, self.repo.changelog.tip(), hg.nullid)
-            return hg.hex(self.repo.changelog.tip())
-
-converters = [convert_cvs, convert_git, convert_mercurial]
-
-def converter(path):
-    if not os.path.isdir(path):
-        abort("%s: not a directory\n" % path)
-    for c in converters:
-        try:
-            return c(path)
-        except NoRepo:
-            pass
-    abort("%s: unknown repository type\n" % path)
-
-class convert(object):
-    def __init__(self, source, dest, mapfile, opts):
-
-        self.source = source
-        self.dest = dest
-        self.mapfile = mapfile
-        self.opts = opts
-        self.commitcache = {}
-
-        self.map = {}
-        try:
-            for l in file(self.mapfile):
-                sv, dv = l[:-1].split()
-                self.map[sv] = dv
-        except IOError:
-            pass
-
-    def walktree(self, heads):
-        visit = heads
-        known = {}
-        parents = {}
-        while visit:
-            n = visit.pop(0)
-            if n in known or n in self.map: continue
-            known[n] = 1
-            self.commitcache[n] = self.source.getcommit(n)
-            cp = self.commitcache[n].parents
-            for p in cp:
-                parents.setdefault(n, []).append(p)
-                visit.append(p)
-
-        return parents
-
-    def toposort(self, parents):
-        visit = parents.keys()
-        seen = {}
-        children = {}
-
-        while visit:
-            n = visit.pop(0)
-            if n in seen: continue
-            seen[n] = 1
-            pc = 0
-            if n in parents:
-                for p in parents[n]:
-                    if p not in self.map: pc += 1
-                    visit.append(p)
-                    children.setdefault(p, []).append(n)
-            if not pc: root = n
-
-        s = []
-        removed = {}
-        visit = children.keys()
-        while visit:
-            n = visit.pop(0)
-            if n in removed: continue
-            dep = 0
-            if n in parents:
-                for p in parents[n]:
-                    if p in self.map: continue
-                    if p not in removed:
-                        # we're still dependent
-                        visit.append(n)
-                        dep = 1
-                        break
-
-            if not dep:
-                # all n's parents are in the list
-                removed[n] = 1
-                if n not in self.map:
-                    s.append(n)
-                if n in children:
-                    for c in children[n]:
-                        visit.insert(0, c)
-
-        if opts.get('datesort'):
-            depth = {}
-            for n in s:
-                depth[n] = 0
-                pl = [p for p in self.commitcache[n].parents if p not in self.map]
-                if pl:
-                    depth[n] = max([depth[p] for p in pl]) + 1
-
-            s = [(depth[n], self.commitcache[n].date, n) for n in s]
-            s.sort()
-            s = [e[2] for e in s]
-
-        return s
-
-    def copy(self, rev):
-        c = self.commitcache[rev]
-        files = self.source.getchanges(rev)
-
-        for f,v in files:
-            try:
-                data = self.source.getfile(f, v)
-            except IOError, inst:
-                self.dest.delfile(f)
-            else:
-                e = self.source.getmode(f, v)
-                self.dest.putfile(f, e, data)
-
-        r = [self.map[v] for v in c.parents]
-        f = [f for f,v in files]
-        self.map[rev] = self.dest.putcommit(f, r, c)
-        file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
-
-    def convert(self):
-        status("scanning source...\n")
-        heads = self.source.getheads()
-        parents = self.walktree(heads)
-        status("sorting...\n")
-        t = self.toposort(parents)
-        num = len(t)
-        c = None
-
-        status("converting...\n")
-        for c in t:
-            num -= 1
-            desc = self.commitcache[c].desc
-            if "\n" in desc:
-                desc = desc.splitlines()[0]
-            status("%d %s\n" % (num, desc))
-            self.copy(c)
-
-        tags = self.source.gettags()
-        ctags = {}
-        for k in tags:
-            v = tags[k]
-            if v in self.map:
-                ctags[k] = self.map[v]
-
-        if c and ctags:
-            nrev = self.dest.puttags(ctags)
-            # write another hash correspondence to override the previous
-            # one so we don't end up with extra tag heads
-            if nrev:
-                file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
-
-def command(src, dest=None, mapfile=None, **opts):
-    srcc = converter(src)
-    if not hasattr(srcc, "getcommit"):
-        abort("%s: can't read from this repo type\n" % src)
-
-    if not dest:
-        dest = src + "-hg"
-        status("assuming destination %s\n" % dest)
-        if not os.path.isdir(dest):
-            status("creating repository %s\n" % dest)
-            os.system("hg init " + dest)
-    destc = converter(dest)
-    if not hasattr(destc, "putcommit"):
-        abort("%s: can't write to this repo type\n" % src)
-
-    if not mapfile:
-        try:
-            mapfile = destc.mapfile()
-        except:
-            mapfile = os.path.join(destc, "map")
-
-    c = convert(srcc, destc, mapfile, opts)
-    c.convert()
-
-options = [('q', 'quiet', None, 'suppress output'),
-           ('', 'datesort', None, 'try to sort changesets by date')]
-opts = {}
-args = fancyopts.fancyopts(sys.argv[1:], options, opts)
-
-if opts['quiet']:
-    quiet = 1
-
-try:
-    command(*args, **opts)
-except Abort, inst:
-    warn(inst)
-except KeyboardInterrupt:
-    status("interrupted\n")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/convert/__init__.py	Wed Jun 06 19:49:47 2007 +0200
@@ -0,0 +1,731 @@
+#!/usr/bin/env python
+#
+# This is a generalized framework for converting between SCM
+# repository formats.
+#
+# To use, run:
+#
+# convert-repo <source> [<dest> [<mapfile>]]
+#
+# Currently accepted source formats: git, cvs
+# Currently accepted destination formats: hg
+#
+# If destination isn't given, a new Mercurial repo named <src>-hg will
+# be created. If <mapfile> isn't given, it will be put in a default
+# location (<dest>/.hg/shamap by default)
+#
+# The <mapfile> is a simple text file that maps each source commit ID to
+# the destination ID for that revision, like so:
+#
+# <source ID> <destination ID>
+#
+# If the file doesn't exist, it's automatically created.  It's updated
+# on each commit copied, so convert-repo can be interrupted and can
+# be run repeatedly to copy new commits.
+
+import sys, os, zlib, sha, time, re, locale, socket
+os.environ["HGENCODING"] = "utf-8"
+from mercurial import hg, ui, util, fancyopts
+
+class Abort(Exception): pass
+class NoRepo(Exception): pass
+
+class commit(object):
+    def __init__(self, **parts):
+        for x in "author date desc parents".split():
+            if not x in parts:
+                abort("commit missing field %s\n" % x)
+        self.__dict__.update(parts)
+
+quiet = 0
+def status(msg):
+    if not quiet: sys.stdout.write(str(msg))
+
+def warn(msg):
+    sys.stderr.write(str(msg))
+
+def abort(msg):
+    raise Abort(msg)
+
+def recode(s):
+    try:
+        return s.decode("utf-8").encode("utf-8")
+    except:
+        try:
+            return s.decode("latin-1").encode("utf-8")
+        except:
+            return s.decode("utf-8", "replace").encode("utf-8")
+
+class converter_source(object):
+    """Conversion source interface"""
+
+    def __init__(self, path):
+        """Initialize conversion source (or raise NoRepo("message")
+        exception if path is not a valid repository)"""
+        raise NotImplementedError()
+
+    def getheads(self):
+        """Return a list of this repository's heads"""
+        raise NotImplementedError()
+
+    def getfile(self, name, rev):
+        """Return file contents as a string"""
+        raise NotImplementedError()
+
+    def getmode(self, name, rev):
+        """Return file mode, eg. '', 'x', or 'l'"""
+        raise NotImplementedError()
+
+    def getchanges(self, version):
+        """Return sorted list of (filename, id) tuples for all files changed in rev.
+        
+        id just tells us which revision to return in getfile(), e.g. in
+        git it's an object hash."""
+        raise NotImplementedError()
+
+    def getcommit(self, version):
+        """Return the commit object for version"""
+        raise NotImplementedError()
+
+    def gettags(self):
+        """Return the tags as a dictionary of name: revision"""
+        raise NotImplementedError()
+
+class converter_sink(object):
+    """Conversion sink (target) interface"""
+
+    def __init__(self, path):
+        """Initialize conversion sink (or raise NoRepo("message")
+        exception if path is not a valid repository)"""
+        raise NotImplementedError()
+
+    def getheads(self):
+        """Return a list of this repository's heads"""
+        raise NotImplementedError()
+
+    def mapfile(self):
+        """Path to a file that will contain lines
+        source_rev_id sink_rev_id
+        mapping equivalent revision identifiers for each system."""
+        raise NotImplementedError()
+
+    def putfile(self, f, e, data):
+        """Put file for next putcommit().
+        f: path to file
+        e: '', 'x', or 'l' (regular file, executable, or symlink)
+        data: file contents"""
+        raise NotImplementedError()
+
+    def delfile(self, f):
+        """Delete file for next putcommit().
+        f: path to file"""
+        raise NotImplementedError()
+
+    def putcommit(self, files, parents, commit):
+        """Create a revision with all changed files listed in 'files'
+        and having listed parents. 'commit' is a commit object containing
+        at a minimum the author, date, and message for this changeset.
+        Called after putfile() and delfile() calls. Note that the sink
+        repository is not told to update itself to a particular revision
+        (or even what that revision would be) before it receives the
+        file data."""
+        raise NotImplementedError()
+
+    def puttags(self, tags):
+        """Put tags into sink.
+        tags: {tagname: sink_rev_id, ...}"""
+        raise NotImplementedError()
+
+
+# CVS conversion code inspired by hg-cvs-import and git-cvsimport
+class convert_cvs(converter_source):
+    def __init__(self, path):
+        self.path = path
+        cvs = os.path.join(path, "CVS")
+        if not os.path.exists(cvs):
+            raise NoRepo("couldn't open CVS repo %s" % path)
+
+        self.changeset = {}
+        self.files = {}
+        self.tags = {}
+        self.lastbranch = {}
+        self.parent = {}
+        self.socket = None
+        self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
+        self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
+        self.encoding = locale.getpreferredencoding()
+        self._parse()
+        self._connect()
+
+    def _parse(self):
+        if self.changeset:
+            return
+
+        d = os.getcwd()
+        try:
+            os.chdir(self.path)
+            id = None
+            state = 0
+            for l in os.popen("cvsps -A -u --cvs-direct -q"):
+                if state == 0: # header
+                    if l.startswith("PatchSet"):
+                        id = l[9:-2]
+                    elif l.startswith("Date"):
+                        date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
+                        date = util.datestr(date)
+                    elif l.startswith("Branch"):
+                        branch = l[8:-1]
+                        self.parent[id] = self.lastbranch.get(branch,'bad')
+                        self.lastbranch[branch] = id
+                    elif l.startswith("Ancestor branch"):
+                        ancestor = l[17:-1]
+                        self.parent[id] = self.lastbranch[ancestor]
+                    elif l.startswith("Author"):
+                        author = self.recode(l[8:-1])
+                    elif l.startswith("Tag: "):
+                        t = l[5:-1].rstrip()
+                        if t != "(none)":
+                            self.tags[t] = id
+                    elif l.startswith("Log:"):
+                        state = 1
+                        log = ""
+                elif state == 1: # log
+                    if l == "Members: \n":
+                        files = {}
+                        log = self.recode(log[:-1])
+                        if log.isspace():
+                            log = "*** empty log message ***\n"
+                        state = 2
+                    else:
+                        log += l
+                elif state == 2:
+                    if l == "\n": #
+                        state = 0
+                        p = [self.parent[id]]
+                        if id == "1":
+                            p = []
+                        c = commit(author=author, date=date, parents=p,
+                                   desc=log, branch=branch)
+                        self.changeset[id] = c
+                        self.files[id] = files
+                    else:
+                        file,rev = l[1:-2].rsplit(':',1)
+                        rev = rev.split("->")[1]
+                        files[file] = rev
+
+            self.heads = self.lastbranch.values()
+        finally:
+            os.chdir(d)
+
+    def _connect(self):
+        root = self.cvsroot
+        conntype = None
+        user, host = None, None
+        cmd = ['cvs', 'server']
+
+        status("connecting to %s\n" % root)
+
+        if root.startswith(":pserver:"):
+            root = root[9:]
+            m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)', root)
+            if m:
+                conntype = "pserver"
+                user, passw, serv, port, root = m.groups()
+                if not user:
+                    user = "anonymous"
+                rr = ":pserver:" + user + "@" + serv + ":" +  root
+                if port:
+                    rr2, port = "-", int(port)
+                else:
+                    rr2, port = rr, 2401
+                rr += str(port)
+
+                if not passw:
+                    passw = "A"
+                    pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
+                    for l in pf:
+                        # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
+                        m = re.match(r'(/\d+\s+/)?(.*)', l)
+                        l = m.group(2)
+                        w, p = l.split(' ', 1)
+                        if w in [rr, rr2]:
+                            passw = p
+                            break
+                    pf.close()
+
+                sck = socket.socket()
+                sck.connect((serv, port))
+                sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw, "END AUTH REQUEST", ""]))
+                if sck.recv(128) != "I LOVE YOU\n":
+                    raise NoRepo("CVS pserver authentication failed")
+
+                self.writep = self.readp = sck.makefile('r+')
+
+        if not conntype and root.startswith(":local:"):
+            conntype = "local"
+            root = root[7:]
+
+        if not conntype:
+            # :ext:user@host/home/user/path/to/cvsroot
+            if root.startswith(":ext:"):
+                root = root[5:]
+            m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
+            if not m:
+                conntype = "local"
+            else:
+                conntype = "rsh"
+                user, host, root = m.group(1), m.group(2), m.group(3)
+
+        if conntype != "pserver":
+            if conntype == "rsh": 
+                rsh = os.environ.get("CVS_RSH" or "rsh")
+                if user:
+                    cmd = [rsh, '-l', user, host] + cmd
+                else:
+                    cmd = [rsh, host] + cmd
+
+            self.writep, self.readp = os.popen2(cmd)
+
+        self.realroot = root
+
+        self.writep.write("Root %s\n" % root)
+        self.writep.write("Valid-responses ok error Valid-requests Mode"
+                          " M Mbinary E Checked-in Created Updated"
+                          " Merged Removed\n")
+        self.writep.write("valid-requests\n")
+        self.writep.flush()
+        r = self.readp.readline()
+        if not r.startswith("Valid-requests"):
+            abort("server sucks\n")
+        if "UseUnchanged" in r:
+            self.writep.write("UseUnchanged\n")
+            self.writep.flush()
+            r = self.readp.readline()
+
+    def getheads(self):
+        return self.heads
+
+    def _getfile(self, name, rev):
+        if rev.endswith("(DEAD)"):
+            raise IOError
+
+        args = ("-N -P -kk -r %s --" % rev).split()
+        args.append(os.path.join(self.cvsrepo, name))
+        for x in args:
+            self.writep.write("Argument %s\n" % x)
+        self.writep.write("Directory .\n%s\nco\n" % self.realroot)
+        self.writep.flush()
+
+        data = ""
+        while 1:
+            line = self.readp.readline()
+            if line.startswith("Created ") or line.startswith("Updated "):
+                self.readp.readline() # path
+                self.readp.readline() # entries
+                mode = self.readp.readline()[:-1]
+                count = int(self.readp.readline()[:-1])
+                data = self.readp.read(count)
+            elif line.startswith(" "):
+                data += line[1:]
+            elif line.startswith("M "):
+                pass
+            elif line.startswith("Mbinary "):
+                count = int(self.readp.readline()[:-1])
+                data = self.readp.read(count)
+            else:
+                if line == "ok\n":
+                    return (data, "x" in mode and "x" or "")
+                elif line.startswith("E "):
+                    warn("cvs server: %s\n" % line[2:])
+                elif line.startswith("Remove"):
+                    l = self.readp.readline()
+                    l = self.readp.readline()
+                    if l != "ok\n":
+                        abort("unknown CVS response: %s\n" % l)
+                else:
+                    abort("unknown CVS response: %s\n" % line)
+
+    def getfile(self, file, rev):
+        data, mode = self._getfile(file, rev)
+        self.modecache[(file, rev)] = mode
+        return data
+
+    def getmode(self, file, rev):
+        return self.modecache[(file, rev)]
+
+    def getchanges(self, rev):
+        self.modecache = {}
+        files = self.files[rev]
+        cl = files.items()
+        cl.sort()
+        return cl
+
+    def recode(self, text):
+        return text.decode(self.encoding, "replace").encode("utf-8")
+
+    def getcommit(self, rev):
+        return self.changeset[rev]
+
+    def gettags(self):
+        return self.tags
+
+class convert_git(converter_source):
+    def __init__(self, path):
+        if os.path.isdir(path + "/.git"):
+            path += "/.git"
+        self.path = path
+        if not os.path.exists(path + "/objects"):
+            raise NoRepo("couldn't open GIT repo %s" % path)
+
+    def getheads(self):
+        fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
+        return [fh.read()[:-1]]
+
+    def catfile(self, rev, type):
+        if rev == "0" * 40: raise IOError()
+        fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
+        return fh.read()
+
+    def getfile(self, name, rev):
+        return self.catfile(rev, "blob")
+
+    def getmode(self, name, rev):
+        return self.modecache[(name, rev)]
+
+    def getchanges(self, version):
+        self.modecache = {}
+        fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
+        changes = []
+        for l in fh:
+            if "\t" not in l: continue
+            m, f = l[:-1].split("\t")
+            m = m.split()
+            h = m[3]
+            p = (m[1] == "100755")
+            s = (m[1] == "120000")
+            self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
+            changes.append((f, h))
+        return changes
+
+    def getcommit(self, version):
+        c = self.catfile(version, "commit") # read the commit hash
+        end = c.find("\n\n")
+        message = c[end+2:]
+        message = recode(message)
+        l = c[:end].splitlines()
+        manifest = l[0].split()[1]
+        parents = []
+        for e in l[1:]:
+            n,v = e.split(" ", 1)
+            if n == "author":
+                p = v.split()
+                tm, tz = p[-2:]
+                author = " ".join(p[:-2])
+                if author[0] == "<": author = author[1:-1]
+                author = recode(author)
+            if n == "committer":
+                p = v.split()
+                tm, tz = p[-2:]
+                committer = " ".join(p[:-2])
+                if committer[0] == "<": committer = committer[1:-1]
+                committer = recode(committer)
+                message += "\ncommitter: %s\n" % committer
+            if n == "parent": parents.append(v)
+
+        tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
+        tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
+        date = tm + " " + str(tz)
+
+        c = commit(parents=parents, date=date, author=author, desc=message)
+        return c
+
+    def gettags(self):
+        tags = {}
+        fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
+        prefix = 'refs/tags/'
+        for line in fh:
+            line = line.strip()
+            if not line.endswith("^{}"):
+                continue
+            node, tag = line.split(None, 1)
+            if not tag.startswith(prefix):
+                continue
+            tag = tag[len(prefix):-3]
+            tags[tag] = node
+
+        return tags
+
+class convert_mercurial(converter_sink):
+    def __init__(self, path):
+        self.path = path
+        u = ui.ui()
+        try:
+            self.repo = hg.repository(u, path)
+        except:
+            raise NoRepo("could open hg repo %s" % path)
+
+    def mapfile(self):
+        return os.path.join(self.path, ".hg", "shamap")
+
+    def getheads(self):
+        h = self.repo.changelog.heads()
+        return [ hg.hex(x) for x in h ]
+
+    def putfile(self, f, e, data):
+        self.repo.wwrite(f, data, e)
+        if self.repo.dirstate.state(f) == '?':
+            self.repo.dirstate.update([f], "a")
+
+    def delfile(self, f):
+        try:
+            os.unlink(self.repo.wjoin(f))
+            #self.repo.remove([f])
+        except:
+            pass
+
+    def putcommit(self, files, parents, commit):
+        seen = {}
+        pl = []
+        for p in parents:
+            if p not in seen:
+                pl.append(p)
+                seen[p] = 1
+        parents = pl
+
+        if len(parents) < 2: parents.append("0" * 40)
+        if len(parents) < 2: parents.append("0" * 40)
+        p2 = parents.pop(0)
+
+        text = commit.desc
+        extra = {}
+        try:
+            extra["branch"] = commit.branch
+        except AttributeError:
+            pass
+
+        while parents:
+            p1 = p2
+            p2 = parents.pop(0)
+            a = self.repo.rawcommit(files, text, commit.author, commit.date,
+                                    hg.bin(p1), hg.bin(p2), extra=extra)
+            text = "(octopus merge fixup)\n"
+            p2 = hg.hex(self.repo.changelog.tip())
+
+        return p2
+
+    def puttags(self, tags):
+        try:
+            old = self.repo.wfile(".hgtags").read()
+            oldlines = old.splitlines(1)
+            oldlines.sort()
+        except:
+            oldlines = []
+
+        k = tags.keys()
+        k.sort()
+        newlines = []
+        for tag in k:
+            newlines.append("%s %s\n" % (tags[tag], tag))
+
+        newlines.sort()
+
+        if newlines != oldlines:
+            status("updating tags\n")
+            f = self.repo.wfile(".hgtags", "w")
+            f.write("".join(newlines))
+            f.close()
+            if not oldlines: self.repo.add([".hgtags"])
+            date = "%s 0" % int(time.mktime(time.gmtime()))
+            self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
+                                date, self.repo.changelog.tip(), hg.nullid)
+            return hg.hex(self.repo.changelog.tip())
+
+converters = [convert_cvs, convert_git, convert_mercurial]
+
+def converter(path):
+    if not os.path.isdir(path):
+        abort("%s: not a directory\n" % path)
+    for c in converters:
+        try:
+            return c(path)
+        except NoRepo:
+            pass
+    abort("%s: unknown repository type\n" % path)
+
+class convert(object):
+    def __init__(self, source, dest, mapfile, opts):
+
+        self.source = source
+        self.dest = dest
+        self.mapfile = mapfile
+        self.opts = opts
+        self.commitcache = {}
+
+        self.map = {}
+        try:
+            for l in file(self.mapfile):
+                sv, dv = l[:-1].split()
+                self.map[sv] = dv
+        except IOError:
+            pass
+
+    def walktree(self, heads):
+        visit = heads
+        known = {}
+        parents = {}
+        while visit:
+            n = visit.pop(0)
+            if n in known or n in self.map: continue
+            known[n] = 1
+            self.commitcache[n] = self.source.getcommit(n)
+            cp = self.commitcache[n].parents
+            for p in cp:
+                parents.setdefault(n, []).append(p)
+                visit.append(p)
+
+        return parents
+
+    def toposort(self, parents):
+        visit = parents.keys()
+        seen = {}
+        children = {}
+
+        while visit:
+            n = visit.pop(0)
+            if n in seen: continue
+            seen[n] = 1
+            pc = 0
+            if n in parents:
+                for p in parents[n]:
+                    if p not in self.map: pc += 1
+                    visit.append(p)
+                    children.setdefault(p, []).append(n)
+            if not pc: root = n
+
+        s = []
+        removed = {}
+        visit = children.keys()
+        while visit:
+            n = visit.pop(0)
+            if n in removed: continue
+            dep = 0
+            if n in parents:
+                for p in parents[n]:
+                    if p in self.map: continue
+                    if p not in removed:
+                        # we're still dependent
+                        visit.append(n)
+                        dep = 1
+                        break
+
+            if not dep:
+                # all n's parents are in the list
+                removed[n] = 1
+                if n not in self.map:
+                    s.append(n)
+                if n in children:
+                    for c in children[n]:
+                        visit.insert(0, c)
+
+        if opts.get('datesort'):
+            depth = {}
+            for n in s:
+                depth[n] = 0
+                pl = [p for p in self.commitcache[n].parents if p not in self.map]
+                if pl:
+                    depth[n] = max([depth[p] for p in pl]) + 1
+
+            s = [(depth[n], self.commitcache[n].date, n) for n in s]
+            s.sort()
+            s = [e[2] for e in s]
+
+        return s
+
+    def copy(self, rev):
+        c = self.commitcache[rev]
+        files = self.source.getchanges(rev)
+
+        for f,v in files:
+            try:
+                data = self.source.getfile(f, v)
+            except IOError, inst:
+                self.dest.delfile(f)
+            else:
+                e = self.source.getmode(f, v)
+                self.dest.putfile(f, e, data)
+
+        r = [self.map[v] for v in c.parents]
+        f = [f for f,v in files]
+        self.map[rev] = self.dest.putcommit(f, r, c)
+        file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
+
+    def convert(self):
+        status("scanning source...\n")
+        heads = self.source.getheads()
+        parents = self.walktree(heads)
+        status("sorting...\n")
+        t = self.toposort(parents)
+        num = len(t)
+        c = None
+
+        status("converting...\n")
+        for c in t:
+            num -= 1
+            desc = self.commitcache[c].desc
+            if "\n" in desc:
+                desc = desc.splitlines()[0]
+            status("%d %s\n" % (num, desc))
+            self.copy(c)
+
+        tags = self.source.gettags()
+        ctags = {}
+        for k in tags:
+            v = tags[k]
+            if v in self.map:
+                ctags[k] = self.map[v]
+
+        if c and ctags:
+            nrev = self.dest.puttags(ctags)
+            # write another hash correspondence to override the previous
+            # one so we don't end up with extra tag heads
+            if nrev:
+                file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
+
+def command(src, dest=None, mapfile=None, **opts):
+    srcc = converter(src)
+    if not hasattr(srcc, "getcommit"):
+        abort("%s: can't read from this repo type\n" % src)
+
+    if not dest:
+        dest = src + "-hg"
+        status("assuming destination %s\n" % dest)
+        if not os.path.isdir(dest):
+            status("creating repository %s\n" % dest)
+            os.system("hg init " + dest)
+    destc = converter(dest)
+    if not hasattr(destc, "putcommit"):
+        abort("%s: can't write to this repo type\n" % src)
+
+    if not mapfile:
+        try:
+            mapfile = destc.mapfile()
+        except:
+            mapfile = os.path.join(destc, "map")
+
+    c = convert(srcc, destc, mapfile, opts)
+    c.convert()
+
+options = [('q', 'quiet', None, 'suppress output'),
+           ('', 'datesort', None, 'try to sort changesets by date')]
+opts = {}
+args = fancyopts.fancyopts(sys.argv[1:], options, opts)
+
+if opts['quiet']:
+    quiet = 1
+
+try:
+    command(*args, **opts)
+except Abort, inst:
+    warn(inst)
+except KeyboardInterrupt:
+    status("interrupted\n")