hgext/convert/git.py
changeset 43077 687b865b95ad
parent 43076 2372284d9457
child 43117 8ff1ecfadcd1
--- a/hgext/convert/git.py	Sun Oct 06 09:45:02 2019 -0400
+++ b/hgext/convert/git.py	Sun Oct 06 09:48:39 2019 -0400
@@ -26,22 +26,22 @@
         self.url = url
 
     def hgsub(self):
-        return "%s = [git]%s" % (self.path, self.url)
+        return b"%s = [git]%s" % (self.path, self.url)
 
     def hgsubstate(self):
-        return "%s %s" % (self.node, self.path)
+        return b"%s %s" % (self.node, self.path)
 
 
 # Keys in extra fields that should not be copied if the user requests.
 bannedextrakeys = {
     # Git commit object built-ins.
-    'tree',
-    'parent',
-    'author',
-    'committer',
+    b'tree',
+    b'parent',
+    b'author',
+    b'committer',
     # Mercurial built-ins.
-    'branch',
-    'close',
+    b'branch',
+    b'close',
 }
 
 
@@ -51,7 +51,7 @@
     # both issues.
 
     def _gitcmd(self, cmd, *args, **kwargs):
-        return cmd('--git-dir=%s' % self.path, *args, **kwargs)
+        return cmd(b'--git-dir=%s' % self.path, *args, **kwargs)
 
     def gitrun0(self, *args, **kwargs):
         return self._gitcmd(self.run0, *args, **kwargs)
@@ -70,100 +70,104 @@
 
     def __init__(self, ui, repotype, path, revs=None):
         super(convert_git, self).__init__(ui, repotype, path, revs=revs)
-        common.commandline.__init__(self, ui, 'git')
+        common.commandline.__init__(self, ui, b'git')
 
         # Pass an absolute path to git to prevent from ever being interpreted
         # as a URL
         path = os.path.abspath(path)
 
-        if os.path.isdir(path + "/.git"):
-            path += "/.git"
-        if not os.path.exists(path + "/objects"):
+        if os.path.isdir(path + b"/.git"):
+            path += b"/.git"
+        if not os.path.exists(path + b"/objects"):
             raise common.NoRepo(
-                _("%s does not look like a Git repository") % path
+                _(b"%s does not look like a Git repository") % path
             )
 
         # The default value (50) is based on the default for 'git diff'.
-        similarity = ui.configint('convert', 'git.similarity')
+        similarity = ui.configint(b'convert', b'git.similarity')
         if similarity < 0 or similarity > 100:
-            raise error.Abort(_('similarity must be between 0 and 100'))
+            raise error.Abort(_(b'similarity must be between 0 and 100'))
         if similarity > 0:
-            self.simopt = ['-C%d%%' % similarity]
-            findcopiesharder = ui.configbool('convert', 'git.findcopiesharder')
+            self.simopt = [b'-C%d%%' % similarity]
+            findcopiesharder = ui.configbool(
+                b'convert', b'git.findcopiesharder'
+            )
             if findcopiesharder:
-                self.simopt.append('--find-copies-harder')
+                self.simopt.append(b'--find-copies-harder')
 
-            renamelimit = ui.configint('convert', 'git.renamelimit')
-            self.simopt.append('-l%d' % renamelimit)
+            renamelimit = ui.configint(b'convert', b'git.renamelimit')
+            self.simopt.append(b'-l%d' % renamelimit)
         else:
             self.simopt = []
 
-        common.checktool('git', 'git')
+        common.checktool(b'git', b'git')
 
         self.path = path
         self.submodules = []
 
-        self.catfilepipe = self.gitpipe('cat-file', '--batch')
+        self.catfilepipe = self.gitpipe(b'cat-file', b'--batch')
 
-        self.copyextrakeys = self.ui.configlist('convert', 'git.extrakeys')
+        self.copyextrakeys = self.ui.configlist(b'convert', b'git.extrakeys')
         banned = set(self.copyextrakeys) & bannedextrakeys
         if banned:
             raise error.Abort(
-                _('copying of extra key is forbidden: %s')
-                % _(', ').join(sorted(banned))
+                _(b'copying of extra key is forbidden: %s')
+                % _(b', ').join(sorted(banned))
             )
 
-        committeractions = self.ui.configlist('convert', 'git.committeractions')
+        committeractions = self.ui.configlist(
+            b'convert', b'git.committeractions'
+        )
 
         messagedifferent = None
         messagealways = None
         for a in committeractions:
-            if a.startswith(('messagedifferent', 'messagealways')):
+            if a.startswith((b'messagedifferent', b'messagealways')):
                 k = a
                 v = None
-                if '=' in a:
-                    k, v = a.split('=', 1)
+                if b'=' in a:
+                    k, v = a.split(b'=', 1)
 
-                if k == 'messagedifferent':
-                    messagedifferent = v or 'committer:'
-                elif k == 'messagealways':
-                    messagealways = v or 'committer:'
+                if k == b'messagedifferent':
+                    messagedifferent = v or b'committer:'
+                elif k == b'messagealways':
+                    messagealways = v or b'committer:'
 
         if messagedifferent and messagealways:
             raise error.Abort(
                 _(
-                    'committeractions cannot define both '
-                    'messagedifferent and messagealways'
+                    b'committeractions cannot define both '
+                    b'messagedifferent and messagealways'
                 )
             )
 
-        dropcommitter = 'dropcommitter' in committeractions
-        replaceauthor = 'replaceauthor' in committeractions
+        dropcommitter = b'dropcommitter' in committeractions
+        replaceauthor = b'replaceauthor' in committeractions
 
         if dropcommitter and replaceauthor:
             raise error.Abort(
                 _(
-                    'committeractions cannot define both '
-                    'dropcommitter and replaceauthor'
+                    b'committeractions cannot define both '
+                    b'dropcommitter and replaceauthor'
                 )
             )
 
         if dropcommitter and messagealways:
             raise error.Abort(
                 _(
-                    'committeractions cannot define both '
-                    'dropcommitter and messagealways'
+                    b'committeractions cannot define both '
+                    b'dropcommitter and messagealways'
                 )
             )
 
         if not messagedifferent and not messagealways:
-            messagedifferent = 'committer:'
+            messagedifferent = b'committer:'
 
         self.committeractions = {
-            'dropcommitter': dropcommitter,
-            'replaceauthor': replaceauthor,
-            'messagedifferent': messagedifferent,
-            'messagealways': messagealways,
+            b'dropcommitter': dropcommitter,
+            b'replaceauthor': replaceauthor,
+            b'messagedifferent': messagedifferent,
+            b'messagealways': messagealways,
         }
 
     def after(self):
@@ -172,35 +176,38 @@
 
     def getheads(self):
         if not self.revs:
-            output, status = self.gitrun('rev-parse', '--branches', '--remotes')
+            output, status = self.gitrun(
+                b'rev-parse', b'--branches', b'--remotes'
+            )
             heads = output.splitlines()
             if status:
-                raise error.Abort(_('cannot retrieve git heads'))
+                raise error.Abort(_(b'cannot retrieve git heads'))
         else:
             heads = []
             for rev in self.revs:
-                rawhead, ret = self.gitrun('rev-parse', '--verify', rev)
+                rawhead, ret = self.gitrun(b'rev-parse', b'--verify', rev)
                 heads.append(rawhead[:-1])
                 if ret:
-                    raise error.Abort(_('cannot retrieve git head "%s"') % rev)
+                    raise error.Abort(_(b'cannot retrieve git head "%s"') % rev)
         return heads
 
     def catfile(self, rev, ftype):
         if rev == nodemod.nullhex:
             raise IOError
-        self.catfilepipe[0].write(rev + '\n')
+        self.catfilepipe[0].write(rev + b'\n')
         self.catfilepipe[0].flush()
         info = self.catfilepipe[1].readline().split()
         if info[1] != ftype:
             raise error.Abort(
-                _('cannot read %r object at %s')
+                _(b'cannot read %r object at %s')
                 % (pycompat.bytestr(ftype), rev)
             )
         size = int(info[2])
         data = self.catfilepipe[1].read(size)
         if len(data) < size:
             raise error.Abort(
-                _('cannot read %r object at %s: unexpected size') % (ftype, rev)
+                _(b'cannot read %r object at %s: unexpected size')
+                % (ftype, rev)
             )
         # read the trailing newline
         self.catfilepipe[1].read(1)
@@ -209,14 +216,14 @@
     def getfile(self, name, rev):
         if rev == nodemod.nullhex:
             return None, None
-        if name == '.hgsub':
-            data = '\n'.join([m.hgsub() for m in self.submoditer()])
-            mode = ''
-        elif name == '.hgsubstate':
-            data = '\n'.join([m.hgsubstate() for m in self.submoditer()])
-            mode = ''
+        if name == b'.hgsub':
+            data = b'\n'.join([m.hgsub() for m in self.submoditer()])
+            mode = b''
+        elif name == b'.hgsubstate':
+            data = b'\n'.join([m.hgsubstate() for m in self.submoditer()])
+            mode = b''
         else:
-            data = self.catfile(rev, "blob")
+            data = self.catfile(rev, b"blob")
             mode = self.modecache[(name, rev)]
         return data, mode
 
@@ -236,21 +243,23 @@
         c = config.config()
         # Each item in .gitmodules starts with whitespace that cant be parsed
         c.parse(
-            '.gitmodules',
-            '\n'.join(line.strip() for line in content.split('\n')),
+            b'.gitmodules',
+            b'\n'.join(line.strip() for line in content.split(b'\n')),
         )
         for sec in c.sections():
             s = c[sec]
-            if 'url' in s and 'path' in s:
-                self.submodules.append(submodule(s['path'], '', s['url']))
+            if b'url' in s and b'path' in s:
+                self.submodules.append(submodule(s[b'path'], b'', s[b'url']))
 
     def retrievegitmodules(self, version):
-        modules, ret = self.gitrun('show', '%s:%s' % (version, '.gitmodules'))
+        modules, ret = self.gitrun(
+            b'show', b'%s:%s' % (version, b'.gitmodules')
+        )
         if ret:
             # This can happen if a file is in the repo that has permissions
             # 160000, but there is no .gitmodules file.
             self.ui.warn(
-                _("warning: cannot read submodules config file in " "%s\n")
+                _(b"warning: cannot read submodules config file in " b"%s\n")
                 % version
             )
             return
@@ -259,74 +268,76 @@
             self.parsegitmodules(modules)
         except error.ParseError:
             self.ui.warn(
-                _("warning: unable to parse .gitmodules in %s\n") % version
+                _(b"warning: unable to parse .gitmodules in %s\n") % version
             )
             return
 
         for m in self.submodules:
-            node, ret = self.gitrun('rev-parse', '%s:%s' % (version, m.path))
+            node, ret = self.gitrun(b'rev-parse', b'%s:%s' % (version, m.path))
             if ret:
                 continue
             m.node = node.strip()
 
     def getchanges(self, version, full):
         if full:
-            raise error.Abort(_("convert from git does not support --full"))
+            raise error.Abort(_(b"convert from git does not support --full"))
         self.modecache = {}
         cmd = (
-            ['diff-tree', '-z', '--root', '-m', '-r'] + self.simopt + [version]
+            [b'diff-tree', b'-z', b'--root', b'-m', b'-r']
+            + self.simopt
+            + [version]
         )
         output, status = self.gitrun(*cmd)
         if status:
-            raise error.Abort(_('cannot read changes in %s') % version)
+            raise error.Abort(_(b'cannot read changes in %s') % version)
         changes = []
         copies = {}
         seen = set()
         entry = None
         subexists = [False]
         subdeleted = [False]
-        difftree = output.split('\x00')
+        difftree = output.split(b'\x00')
         lcount = len(difftree)
         i = 0
 
-        skipsubmodules = self.ui.configbool('convert', 'git.skipsubmodules')
+        skipsubmodules = self.ui.configbool(b'convert', b'git.skipsubmodules')
 
         def add(entry, f, isdest):
             seen.add(f)
             h = entry[3]
-            p = entry[1] == "100755"
-            s = entry[1] == "120000"
-            renamesource = not isdest and entry[4][0] == 'R'
+            p = entry[1] == b"100755"
+            s = entry[1] == b"120000"
+            renamesource = not isdest and entry[4][0] == b'R'
 
-            if f == '.gitmodules':
+            if f == b'.gitmodules':
                 if skipsubmodules:
                     return
 
                 subexists[0] = True
-                if entry[4] == 'D' or renamesource:
+                if entry[4] == b'D' or renamesource:
                     subdeleted[0] = True
-                    changes.append(('.hgsub', nodemod.nullhex))
+                    changes.append((b'.hgsub', nodemod.nullhex))
                 else:
-                    changes.append(('.hgsub', ''))
-            elif entry[1] == '160000' or entry[0] == ':160000':
+                    changes.append((b'.hgsub', b''))
+            elif entry[1] == b'160000' or entry[0] == b':160000':
                 if not skipsubmodules:
                     subexists[0] = True
             else:
                 if renamesource:
                     h = nodemod.nullhex
-                self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
+                self.modecache[(f, h)] = (p and b"x") or (s and b"l") or b""
                 changes.append((f, h))
 
         while i < lcount:
             l = difftree[i]
             i += 1
             if not entry:
-                if not l.startswith(':'):
+                if not l.startswith(b':'):
                     continue
                 entry = tuple(pycompat.bytestr(p) for p in l.split())
                 continue
             f = l
-            if entry[4][0] == 'C':
+            if entry[4][0] == b'C':
                 copysrc = f
                 copydest = difftree[i]
                 i += 1
@@ -336,7 +347,7 @@
                 add(entry, f, False)
             # A file can be copied multiple times, or modified and copied
             # simultaneously. So f can be repeated even if fdest isn't.
-            if entry[4][0] == 'R':
+            if entry[4][0] == b'R':
                 # rename: next line is the destination
                 fdest = difftree[i]
                 i += 1
@@ -344,21 +355,21 @@
                     add(entry, fdest, True)
                     # .gitmodules isn't imported at all, so it being copied to
                     # and fro doesn't really make sense
-                    if f != '.gitmodules' and fdest != '.gitmodules':
+                    if f != b'.gitmodules' and fdest != b'.gitmodules':
                         copies[fdest] = f
             entry = None
 
         if subexists[0]:
             if subdeleted[0]:
-                changes.append(('.hgsubstate', nodemod.nullhex))
+                changes.append((b'.hgsubstate', nodemod.nullhex))
             else:
                 self.retrievegitmodules(version)
-                changes.append(('.hgsubstate', ''))
+                changes.append((b'.hgsubstate', b''))
         return (changes, copies, set())
 
     def getcommit(self, version):
-        c = self.catfile(version, "commit")  # read the commit hash
-        end = c.find("\n\n")
+        c = self.catfile(version, b"commit")  # read the commit hash
+        end = c.find(b"\n\n")
         message = c[end + 2 :]
         message = self.recode(message)
         l = c[:end].splitlines()
@@ -366,43 +377,43 @@
         author = committer = None
         extra = {}
         for e in l[1:]:
-            n, v = e.split(" ", 1)
-            if n == "author":
+            n, v = e.split(b" ", 1)
+            if n == b"author":
                 p = v.split()
                 tm, tz = p[-2:]
-                author = " ".join(p[:-2])
-                if author[0] == "<":
+                author = b" ".join(p[:-2])
+                if author[0] == b"<":
                     author = author[1:-1]
                 author = self.recode(author)
-            if n == "committer":
+            if n == b"committer":
                 p = v.split()
                 tm, tz = p[-2:]
-                committer = " ".join(p[:-2])
-                if committer[0] == "<":
+                committer = b" ".join(p[:-2])
+                if committer[0] == b"<":
                     committer = committer[1:-1]
                 committer = self.recode(committer)
-            if n == "parent":
+            if n == b"parent":
                 parents.append(v)
             if n in self.copyextrakeys:
                 extra[n] = v
 
-        if self.committeractions['dropcommitter']:
+        if self.committeractions[b'dropcommitter']:
             committer = None
-        elif self.committeractions['replaceauthor']:
+        elif self.committeractions[b'replaceauthor']:
             author = committer
 
         if committer:
-            messagealways = self.committeractions['messagealways']
-            messagedifferent = self.committeractions['messagedifferent']
+            messagealways = self.committeractions[b'messagealways']
+            messagedifferent = self.committeractions[b'messagedifferent']
             if messagealways:
-                message += '\n%s %s\n' % (messagealways, committer)
+                message += b'\n%s %s\n' % (messagealways, committer)
             elif messagedifferent and author != committer:
-                message += '\n%s %s\n' % (messagedifferent, committer)
+                message += b'\n%s %s\n' % (messagedifferent, committer)
 
-        tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
+        tzs, tzh, tzm = tz[-5:-4] + b"1", tz[-4:-2], tz[-2:]
         tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
-        date = tm + " " + (b"%d" % tz)
-        saverev = self.ui.configbool('convert', 'git.saverev')
+        date = tm + b" " + (b"%d" % tz)
+        saverev = self.ui.configbool(b'convert', b'git.saverev')
 
         c = common.commit(
             parents=parents,
@@ -416,27 +427,27 @@
         return c
 
     def numcommits(self):
-        output, ret = self.gitrunlines('rev-list', '--all')
+        output, ret = self.gitrunlines(b'rev-list', b'--all')
         if ret:
             raise error.Abort(
-                _('cannot retrieve number of commits in %s') % self.path
+                _(b'cannot retrieve number of commits in %s') % self.path
             )
         return len(output)
 
     def gettags(self):
         tags = {}
         alltags = {}
-        output, status = self.gitrunlines('ls-remote', '--tags', self.path)
+        output, status = self.gitrunlines(b'ls-remote', b'--tags', self.path)
 
         if status:
-            raise error.Abort(_('cannot read tags from %s') % self.path)
-        prefix = 'refs/tags/'
+            raise error.Abort(_(b'cannot read tags from %s') % self.path)
+        prefix = b'refs/tags/'
 
         # Build complete list of tags, both annotated and bare ones
         for line in output:
             line = line.strip()
-            if line.startswith("error:") or line.startswith("fatal:"):
-                raise error.Abort(_('cannot read tags from %s') % self.path)
+            if line.startswith(b"error:") or line.startswith(b"fatal:"):
+                raise error.Abort(_(b'cannot read tags from %s') % self.path)
             node, tag = line.split(None, 1)
             if not tag.startswith(prefix):
                 continue
@@ -444,10 +455,10 @@
 
         # Filter out tag objects for annotated tag refs
         for tag in alltags:
-            if tag.endswith('^{}'):
+            if tag.endswith(b'^{}'):
                 tags[tag[:-3]] = alltags[tag]
             else:
-                if tag + '^{}' in alltags:
+                if tag + b'^{}' in alltags:
                     continue
                 else:
                     tags[tag] = alltags[tag]
@@ -458,28 +469,28 @@
         changes = []
         if i is None:
             output, status = self.gitrunlines(
-                'diff-tree', '--root', '-m', '-r', version
+                b'diff-tree', b'--root', b'-m', b'-r', version
             )
             if status:
-                raise error.Abort(_('cannot read changes in %s') % version)
+                raise error.Abort(_(b'cannot read changes in %s') % version)
             for l in output:
-                if "\t" not in l:
+                if b"\t" not in l:
                     continue
-                m, f = l[:-1].split("\t")
+                m, f = l[:-1].split(b"\t")
                 changes.append(f)
         else:
             output, status = self.gitrunlines(
-                'diff-tree',
-                '--name-only',
-                '--root',
-                '-r',
+                b'diff-tree',
+                b'--name-only',
+                b'--root',
+                b'-r',
                 version,
-                '%s^%d' % (version, i + 1),
-                '--',
+                b'%s^%d' % (version, i + 1),
+                b'--',
             )
             if status:
-                raise error.Abort(_('cannot read changes in %s') % version)
-            changes = [f.rstrip('\n') for f in output]
+                raise error.Abort(_(b'cannot read changes in %s') % version)
+            changes = [f.rstrip(b'\n') for f in output]
 
         return changes
 
@@ -487,19 +498,19 @@
         bookmarks = {}
 
         # Handle local and remote branches
-        remoteprefix = self.ui.config('convert', 'git.remoteprefix')
+        remoteprefix = self.ui.config(b'convert', b'git.remoteprefix')
         reftypes = [
             # (git prefix, hg prefix)
-            ('refs/remotes/origin/', remoteprefix + '/'),
-            ('refs/heads/', ''),
+            (b'refs/remotes/origin/', remoteprefix + b'/'),
+            (b'refs/heads/', b''),
         ]
 
         exclude = {
-            'refs/remotes/origin/HEAD',
+            b'refs/remotes/origin/HEAD',
         }
 
         try:
-            output, status = self.gitrunlines('show-ref')
+            output, status = self.gitrunlines(b'show-ref')
             for line in output:
                 line = line.strip()
                 rev, name = line.split(None, 1)
@@ -507,13 +518,13 @@
                 for gitprefix, hgprefix in reftypes:
                     if not name.startswith(gitprefix) or name in exclude:
                         continue
-                    name = '%s%s' % (hgprefix, name[len(gitprefix) :])
+                    name = b'%s%s' % (hgprefix, name[len(gitprefix) :])
                     bookmarks[name] = rev
         except Exception:
             pass
 
         return bookmarks
 
-    def checkrevformat(self, revstr, mapname='splicemap'):
+    def checkrevformat(self, revstr, mapname=b'splicemap'):
         """ git revision string is a 40 byte hex """
         self.checkhexformat(revstr, mapname)