hgext/fastexport.py
changeset 44280 93a05cb223da
child 44690 44aff45b556d
equal deleted inserted replaced
44279:e48a996d12bc 44280:93a05cb223da
       
     1 # Copyright 2020 Joerg Sonnenberger <joerg@bec.de>
       
     2 #
       
     3 # This software may be used and distributed according to the terms of the
       
     4 # GNU General Public License version 2 or any later version.
       
     5 """export repositories as git fast-import stream"""
       
     6 
       
     7 # The format specification for fast-import streams can be found at
       
     8 # https://git-scm.com/docs/git-fast-import#_input_format
       
     9 
       
    10 from __future__ import absolute_import
       
    11 import re
       
    12 
       
    13 from mercurial.i18n import _
       
    14 from mercurial.node import hex, nullrev
       
    15 from mercurial.utils import stringutil
       
    16 from mercurial import (
       
    17     error,
       
    18     pycompat,
       
    19     registrar,
       
    20     scmutil,
       
    21 )
       
    22 from .convert import convcmd
       
    23 
       
    24 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
       
    25 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
       
    26 # be specifying the version(s) of Mercurial they are tested with, or
       
    27 # leave the attribute unspecified.
       
    28 testedwith = b"ships-with-hg-core"
       
    29 
       
    30 cmdtable = {}
       
    31 command = registrar.command(cmdtable)
       
    32 
       
    33 GIT_PERSON_PROHIBITED = re.compile(b'[<>\n"]')
       
    34 GIT_EMAIL_PROHIBITED = re.compile(b"[<> \n]")
       
    35 
       
    36 
       
    37 def convert_to_git_user(authormap, user, rev):
       
    38     mapped_user = authormap.get(user, user)
       
    39     user_person = stringutil.person(mapped_user)
       
    40     user_email = stringutil.email(mapped_user)
       
    41     if GIT_EMAIL_PROHIBITED.match(user_email) or GIT_PERSON_PROHIBITED.match(
       
    42         user_person
       
    43     ):
       
    44         raise error.Abort(
       
    45             _(b"Unable to parse user into person and email for revision " + rev)
       
    46         )
       
    47     if user_person:
       
    48         return b'"' + user_person + b'" <' + user_email + b'>'
       
    49     else:
       
    50         return b"<" + user_email + b">"
       
    51 
       
    52 
       
    53 def convert_to_git_date(date):
       
    54     timestamp, utcoff = date
       
    55     tzsign = b"+" if utcoff < 0 else b"-"
       
    56     if utcoff % 60 != 0:
       
    57         raise error.Abort(
       
    58             _(b"UTC offset in %b is not an integer number of seconds") % (date,)
       
    59         )
       
    60     utcoff = abs(utcoff) // 60
       
    61     tzh = utcoff // 60
       
    62     tzmin = utcoff % 60
       
    63     return b"%d " % int(timestamp) + tzsign + b"%02d%02d" % (tzh, tzmin)
       
    64 
       
    65 
       
    66 def convert_to_git_ref(branch):
       
    67     # XXX filter/map depending on git restrictions
       
    68     return b"refs/heads/" + branch
       
    69 
       
    70 
       
    71 def write_data(buf, data, skip_newline):
       
    72     buf.append(b"data %d\n" % len(data))
       
    73     buf.append(data)
       
    74     if not skip_newline or data[-1:] != b"\n":
       
    75         buf.append(b"\n")
       
    76 
       
    77 
       
    78 def export_commit(ui, repo, rev, marks, authormap):
       
    79     ctx = repo[rev]
       
    80     revid = ctx.hex()
       
    81     if revid in marks:
       
    82         ui.warn(_(b"warning: revision %s already exported, skipped\n") % revid)
       
    83         return
       
    84     parents = [p for p in ctx.parents() if p.rev() != nullrev]
       
    85     for p in parents:
       
    86         if p.hex() not in marks:
       
    87             ui.warn(
       
    88                 _(b"warning: parent %s of %s has not been exported, skipped\n")
       
    89                 % (p, revid)
       
    90             )
       
    91             return
       
    92 
       
    93     # For all files modified by the commit, check if they have already
       
    94     # been exported and otherwise dump the blob with the new mark.
       
    95     for fname in ctx.files():
       
    96         if fname not in ctx:
       
    97             continue
       
    98         filectx = ctx.filectx(fname)
       
    99         filerev = hex(filectx.filenode())
       
   100         if filerev not in marks:
       
   101             mark = len(marks) + 1
       
   102             marks[filerev] = mark
       
   103             data = filectx.data()
       
   104             buf = [b"blob\n", b"mark :%d\n" % mark]
       
   105             write_data(buf, data, False)
       
   106             ui.write(*buf, keepprogressbar=True)
       
   107             del buf
       
   108 
       
   109     # Assign a mark for the current revision for references by
       
   110     # latter merge commits.
       
   111     mark = len(marks) + 1
       
   112     marks[revid] = mark
       
   113 
       
   114     ref = convert_to_git_ref(ctx.branch())
       
   115     buf = [
       
   116         b"commit %s\n" % ref,
       
   117         b"mark :%d\n" % mark,
       
   118         b"committer %s %s\n"
       
   119         % (
       
   120             convert_to_git_user(authormap, ctx.user(), revid),
       
   121             convert_to_git_date(ctx.date()),
       
   122         ),
       
   123     ]
       
   124     write_data(buf, ctx.description(), True)
       
   125     if parents:
       
   126         buf.append(b"from :%d\n" % marks[parents[0].hex()])
       
   127     if len(parents) == 2:
       
   128         buf.append(b"merge :%d\n" % marks[parents[1].hex()])
       
   129         p0ctx = repo[parents[0]]
       
   130         files = ctx.manifest().diff(p0ctx.manifest())
       
   131     else:
       
   132         files = ctx.files()
       
   133     filebuf = []
       
   134     for fname in files:
       
   135         if fname not in ctx:
       
   136             filebuf.append((fname, b"D %s\n" % fname))
       
   137         else:
       
   138             filectx = ctx.filectx(fname)
       
   139             filerev = filectx.filenode()
       
   140             fileperm = b"755" if filectx.isexec() else b"644"
       
   141             changed = b"M %s :%d %s\n" % (fileperm, marks[hex(filerev)], fname)
       
   142             filebuf.append((fname, changed))
       
   143     filebuf.sort()
       
   144     buf.extend(changed for (fname, changed) in filebuf)
       
   145     del filebuf
       
   146     buf.append(b"\n")
       
   147     ui.write(*buf, keepprogressbar=True)
       
   148     del buf
       
   149 
       
   150 
       
   151 isrev = re.compile(b"^[0-9a-f]{40}$")
       
   152 
       
   153 
       
   154 @command(
       
   155     b"fastexport",
       
   156     [
       
   157         (b"r", b"rev", [], _(b"revisions to export"), _(b"REV")),
       
   158         (b"i", b"import-marks", b"", _(b"old marks file to read"), _(b"FILE")),
       
   159         (b"e", b"export-marks", b"", _(b"new marks file to write"), _(b"FILE")),
       
   160         (
       
   161             b"A",
       
   162             b"authormap",
       
   163             b"",
       
   164             _(b"remap usernames using this file"),
       
   165             _(b"FILE"),
       
   166         ),
       
   167     ],
       
   168     _(b"[OPTION]... [REV]..."),
       
   169     helpcategory=command.CATEGORY_IMPORT_EXPORT,
       
   170 )
       
   171 def fastexport(ui, repo, *revs, **opts):
       
   172     """export repository as git fast-import stream
       
   173 
       
   174     This command lets you dump a repository as a human-readable text stream.
       
   175     It can be piped into corresponding import routines like "git fast-import".
       
   176     Incremental dumps can be created by using marks files.
       
   177     """
       
   178     opts = pycompat.byteskwargs(opts)
       
   179 
       
   180     revs += tuple(opts.get(b"rev", []))
       
   181     if not revs:
       
   182         revs = scmutil.revrange(repo, [b":"])
       
   183     else:
       
   184         revs = scmutil.revrange(repo, revs)
       
   185     if not revs:
       
   186         raise error.Abort(_(b"no revisions matched"))
       
   187     authorfile = opts.get(b"authormap")
       
   188     if authorfile:
       
   189         authormap = convcmd.readauthormap(ui, authorfile)
       
   190     else:
       
   191         authormap = {}
       
   192 
       
   193     import_marks = opts.get(b"import_marks")
       
   194     marks = {}
       
   195     if import_marks:
       
   196         with open(import_marks, "rb") as import_marks_file:
       
   197             for line in import_marks_file:
       
   198                 line = line.strip()
       
   199                 if not isrev.match(line) or line in marks:
       
   200                     raise error.Abort(_(b"Corrupted marks file"))
       
   201                 marks[line] = len(marks) + 1
       
   202 
       
   203     revs.sort()
       
   204     with ui.makeprogress(
       
   205         _(b"exporting"), unit=_(b"revisions"), total=len(revs)
       
   206     ) as progress:
       
   207         for rev in revs:
       
   208             export_commit(ui, repo, rev, marks, authormap)
       
   209             progress.increment()
       
   210 
       
   211     export_marks = opts.get(b"export_marks")
       
   212     if export_marks:
       
   213         with open(export_marks, "wb") as export_marks_file:
       
   214             output_marks = [None] * len(marks)
       
   215             for k, v in marks.items():
       
   216                 output_marks[v - 1] = k
       
   217             for k in output_marks:
       
   218                 export_marks_file.write(k + b"\n")