# HG changeset patch # User Pulkit Goyal <7895pulkit@gmail.com> # Date 1558031669 -19800 # Node ID 5364ba1f796f62706a696d6256a82b09b85c8678 # Parent c7652f7440d96726a940c1ee7fd5a6429328b364 py3: make contrib/testparseutil.py to work on str(unicodes) contrib/check-code work on unicodes and call functions from testparseutil.py which before this patch used to work on bytes. This path removes that inconsistency and make testparseutil.py work on unicodes. This makes test-check-code.t and test-contrib-check-code.t work on Python 3 again. Differential Revision: https://phab.mercurial-scm.org/D6391 diff -r c7652f7440d9 -r 5364ba1f796f contrib/testparseutil.py --- a/contrib/testparseutil.py Fri May 17 09:36:29 2019 -0400 +++ b/contrib/testparseutil.py Fri May 17 00:04:29 2019 +0530 @@ -54,7 +54,7 @@ return s.decode(u'latin-1') def opentext(f): - return open(f, 'rb') + return open(f, 'r') else: stdin = sys.stdin stdout = sys.stdout @@ -164,14 +164,14 @@ ... self.matchfunc = matchfunc ... def startsat(self, line): ... return self.matchfunc(line) - >>> ambig1 = ambigmatcher(b'ambiguous #1', - ... lambda l: l.startswith(b' $ cat ')) - >>> ambig2 = ambigmatcher(b'ambiguous #2', - ... lambda l: l.endswith(b'<< EOF\\n')) - >>> lines = [b' $ cat > foo.py << EOF\\n'] + >>> ambig1 = ambigmatcher('ambiguous #1', + ... lambda l: l.startswith(' $ cat ')) + >>> ambig2 = ambigmatcher('ambiguous #2', + ... lambda l: l.endswith('<< EOF\\n')) + >>> lines = [' $ cat > foo.py << EOF\\n'] >>> errors = [] >>> matchers = [ambig1, ambig2] - >>> list(t for t in embedded(b'', lines, errors, matchers)) + >>> list(t for t in embedded('', lines, errors, matchers)) [] >>> b2s(errors) [':1: ambiguous line for "ambiguous #1", "ambiguous #2"'] @@ -181,21 +181,21 @@ ctx = filename = code = startline = None # for pyflakes for lineno, line in enumerate(lines, 1): - if not line.endswith(b'\n'): - line += b'\n' # to normalize EOF line + if not line.endswith('\n'): + line += '\n' # to normalize EOF line if matcher: # now, inside embedded code if matcher.endsat(ctx, line): codeatend = matcher.codeatend(ctx, line) if codeatend is not None: code.append(codeatend) if not matcher.ignores(ctx): - yield (filename, startline, lineno, b''.join(code)) + yield (filename, startline, lineno, ''.join(code)) matcher = None # DO NOT "continue", because line might start next fragment elif not matcher.isinside(ctx, line): # this is an error of basefile # (if matchers are implemented correctly) - errors.append(b'%s:%d: unexpected line for "%s"' + errors.append('%s:%d: unexpected line for "%s"' % (basefile, lineno, matcher.desc)) # stop extracting embedded code by current 'matcher', # because appearance of unexpected line might mean @@ -218,9 +218,9 @@ if matched: if len(matched) > 1: # this is an error of matchers, maybe - errors.append(b'%s:%d: ambiguous line for %s' % + errors.append('%s:%d: ambiguous line for %s' % (basefile, lineno, - b', '.join([b'"%s"' % m.desc + ', '.join(['"%s"' % m.desc for m, c in matched]))) # omit extracting embedded code, because choosing # arbitrary matcher from matched ones might fail to @@ -239,20 +239,20 @@ if matcher: # examine whether EOF ends embedded code, because embedded # code isn't yet ended explicitly - if matcher.endsat(ctx, b'\n'): - codeatend = matcher.codeatend(ctx, b'\n') + if matcher.endsat(ctx, '\n'): + codeatend = matcher.codeatend(ctx, '\n') if codeatend is not None: code.append(codeatend) if not matcher.ignores(ctx): - yield (filename, startline, lineno + 1, b''.join(code)) + yield (filename, startline, lineno + 1, ''.join(code)) else: # this is an error of basefile # (if matchers are implemented correctly) - errors.append(b'%s:%d: unexpected end of file for "%s"' + errors.append('%s:%d: unexpected end of file for "%s"' % (basefile, lineno, matcher.desc)) # heredoc limit mark to ignore embedded code at check-code.py or so -heredocignorelimit = b'NO_CHECK_EOF' +heredocignorelimit = 'NO_CHECK_EOF' # the pattern to match against cases below, and to return a limit mark # string as 'lname' group @@ -260,47 +260,47 @@ # - << LIMITMARK # - << "LIMITMARK" # - << 'LIMITMARK' -heredoclimitpat = br'\s*<<\s*(?P["\']?)(?P\w+)(?P=lquote)' +heredoclimitpat = r'\s*<<\s*(?P["\']?)(?P\w+)(?P=lquote)' class fileheredocmatcher(embeddedmatcher): """Detect "cat > FILE << LIMIT" style embedded code >>> matcher = fileheredocmatcher(b'heredoc .py file', br'[^<]+\\.py') - >>> b2s(matcher.startsat(b' $ cat > file.py << EOF\\n')) + >>> b2s(matcher.startsat(' $ cat > file.py << EOF\\n')) ('file.py', ' > EOF\\n') - >>> b2s(matcher.startsat(b' $ cat >>file.py <>> b2s(matcher.startsat(' $ cat >>file.py < EOF\\n') - >>> b2s(matcher.startsat(b' $ cat> \\x27any file.py\\x27<< "EOF"\\n')) + >>> b2s(matcher.startsat(' $ cat> \\x27any file.py\\x27<< "EOF"\\n')) ('any file.py', ' > EOF\\n') - >>> b2s(matcher.startsat(b" $ cat > file.py << 'ANYLIMIT'\\n")) + >>> b2s(matcher.startsat(" $ cat > file.py << 'ANYLIMIT'\\n")) ('file.py', ' > ANYLIMIT\\n') - >>> b2s(matcher.startsat(b' $ cat<"file.py"\\n')) + >>> b2s(matcher.startsat(' $ cat<"file.py"\\n')) ('file.py', ' > ANYLIMIT\\n') - >>> start = b' $ cat > file.py << EOF\\n' + >>> start = ' $ cat > file.py << EOF\\n' >>> ctx = matcher.startsat(start) >>> matcher.codeatstart(ctx, start) >>> b2s(matcher.filename(ctx)) 'file.py' >>> matcher.ignores(ctx) False - >>> inside = b' > foo = 1\\n' + >>> inside = ' > foo = 1\\n' >>> matcher.endsat(ctx, inside) False >>> matcher.isinside(ctx, inside) True >>> b2s(matcher.codeinside(ctx, inside)) 'foo = 1\\n' - >>> end = b' > EOF\\n' + >>> end = ' > EOF\\n' >>> matcher.endsat(ctx, end) True >>> matcher.codeatend(ctx, end) - >>> matcher.endsat(ctx, b' > EOFEOF\\n') + >>> matcher.endsat(ctx, ' > EOFEOF\\n') False - >>> ctx = matcher.startsat(b' $ cat > file.py << NO_CHECK_EOF\\n') + >>> ctx = matcher.startsat(' $ cat > file.py << NO_CHECK_EOF\\n') >>> matcher.ignores(ctx) True """ - _prefix = b' > ' + _prefix = ' > ' def __init__(self, desc, namepat): super(fileheredocmatcher, self).__init__(desc) @@ -312,13 +312,13 @@ # - > NAMEPAT # - > "NAMEPAT" # - > 'NAMEPAT' - namepat = (br'\s*>>?\s*(?P["\']?)(?P%s)(?P=nquote)' + namepat = (r'\s*>>?\s*(?P["\']?)(?P%s)(?P=nquote)' % namepat) self._fileres = [ # "cat > NAME << LIMIT" case - re.compile(br' \$ \s*cat' + namepat + heredoclimitpat), + re.compile(r' \$ \s*cat' + namepat + heredoclimitpat), # "cat << LIMIT > NAME" case - re.compile(br' \$ \s*cat' + heredoclimitpat + namepat), + re.compile(r' \$ \s*cat' + heredoclimitpat + namepat), ] def startsat(self, line): @@ -327,7 +327,7 @@ matched = filere.match(line) if matched: return (matched.group('name'), - b' > %s\n' % matched.group('limit')) + ' > %s\n' % matched.group('limit')) def endsat(self, ctx, line): return ctx[1] == line @@ -336,7 +336,7 @@ return line.startswith(self._prefix) def ignores(self, ctx): - return b' > %s\n' % heredocignorelimit == ctx[1] + return ' > %s\n' % heredocignorelimit == ctx[1] def filename(self, ctx): return ctx[0] @@ -357,10 +357,10 @@ """Detect ">>> code" style embedded python code >>> matcher = pydoctestmatcher() - >>> startline = b' >>> foo = 1\\n' + >>> startline = ' >>> foo = 1\\n' >>> matcher.startsat(startline) True - >>> matcher.startsat(b' ... foo = 1\\n') + >>> matcher.startsat(' ... foo = 1\\n') False >>> ctx = matcher.startsat(startline) >>> matcher.filename(ctx) @@ -368,45 +368,45 @@ False >>> b2s(matcher.codeatstart(ctx, startline)) 'foo = 1\\n' - >>> inside = b' >>> foo = 1\\n' + >>> inside = ' >>> foo = 1\\n' >>> matcher.endsat(ctx, inside) False >>> matcher.isinside(ctx, inside) True >>> b2s(matcher.codeinside(ctx, inside)) 'foo = 1\\n' - >>> inside = b' ... foo = 1\\n' + >>> inside = ' ... foo = 1\\n' >>> matcher.endsat(ctx, inside) False >>> matcher.isinside(ctx, inside) True >>> b2s(matcher.codeinside(ctx, inside)) 'foo = 1\\n' - >>> inside = b' expected output\\n' + >>> inside = ' expected output\\n' >>> matcher.endsat(ctx, inside) False >>> matcher.isinside(ctx, inside) True >>> b2s(matcher.codeinside(ctx, inside)) '\\n' - >>> inside = b' \\n' + >>> inside = ' \\n' >>> matcher.endsat(ctx, inside) False >>> matcher.isinside(ctx, inside) True >>> b2s(matcher.codeinside(ctx, inside)) '\\n' - >>> end = b' $ foo bar\\n' + >>> end = ' $ foo bar\\n' >>> matcher.endsat(ctx, end) True >>> matcher.codeatend(ctx, end) - >>> end = b'\\n' + >>> end = '\\n' >>> matcher.endsat(ctx, end) True >>> matcher.codeatend(ctx, end) """ - _prefix = b' >>> ' - _prefixre = re.compile(br' (>>>|\.\.\.) ') + _prefix = ' >>> ' + _prefixre = re.compile(r' (>>>|\.\.\.) ') # If a line matches against not _prefixre but _outputre, that line # is "an expected output line" (= not a part of code fragment). @@ -416,10 +416,10 @@ # run-tests.py. But "directive line inside inline python code" # should be rejected by Mercurial reviewers. Therefore, this # regexp does not matche against such directive lines. - _outputre = re.compile(br' $| [^$]') + _outputre = re.compile(r' $| [^$]') def __init__(self): - super(pydoctestmatcher, self).__init__(b"doctest style python code") + super(pydoctestmatcher, self).__init__("doctest style python code") def startsat(self, line): # ctx is "True" @@ -446,57 +446,57 @@ def codeinside(self, ctx, line): if self._prefixre.match(line): return line[len(self._prefix):] # strip prefix ' >>> '/' ... ' - return b'\n' # an expected output line is treated as an empty line + return '\n' # an expected output line is treated as an empty line class pyheredocmatcher(embeddedmatcher): """Detect "python << LIMIT" style embedded python code >>> matcher = pyheredocmatcher() - >>> b2s(matcher.startsat(b' $ python << EOF\\n')) + >>> b2s(matcher.startsat(' $ python << EOF\\n')) ' > EOF\\n' - >>> b2s(matcher.startsat(b' $ $PYTHON <>> b2s(matcher.startsat(' $ $PYTHON < EOF\\n' - >>> b2s(matcher.startsat(b' $ "$PYTHON"<< "EOF"\\n')) + >>> b2s(matcher.startsat(' $ "$PYTHON"<< "EOF"\\n')) ' > EOF\\n' - >>> b2s(matcher.startsat(b" $ $PYTHON << 'ANYLIMIT'\\n")) + >>> b2s(matcher.startsat(" $ $PYTHON << 'ANYLIMIT'\\n")) ' > ANYLIMIT\\n' - >>> matcher.startsat(b' $ "$PYTHON" < EOF\\n') - >>> start = b' $ python << EOF\\n' + >>> matcher.startsat(' $ "$PYTHON" < EOF\\n') + >>> start = ' $ python << EOF\\n' >>> ctx = matcher.startsat(start) >>> matcher.codeatstart(ctx, start) >>> matcher.filename(ctx) >>> matcher.ignores(ctx) False - >>> inside = b' > foo = 1\\n' + >>> inside = ' > foo = 1\\n' >>> matcher.endsat(ctx, inside) False >>> matcher.isinside(ctx, inside) True >>> b2s(matcher.codeinside(ctx, inside)) 'foo = 1\\n' - >>> end = b' > EOF\\n' + >>> end = ' > EOF\\n' >>> matcher.endsat(ctx, end) True >>> matcher.codeatend(ctx, end) - >>> matcher.endsat(ctx, b' > EOFEOF\\n') + >>> matcher.endsat(ctx, ' > EOFEOF\\n') False - >>> ctx = matcher.startsat(b' $ python << NO_CHECK_EOF\\n') + >>> ctx = matcher.startsat(' $ python << NO_CHECK_EOF\\n') >>> matcher.ignores(ctx) True """ - _prefix = b' > ' + _prefix = ' > ' - _startre = re.compile(br' \$ (\$PYTHON|"\$PYTHON"|python).*' + + _startre = re.compile(r' \$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat) def __init__(self): - super(pyheredocmatcher, self).__init__(b"heredoc python invocation") + super(pyheredocmatcher, self).__init__("heredoc python invocation") def startsat(self, line): # ctx is END-LINE-OF-EMBEDDED-CODE matched = self._startre.match(line) if matched: - return b' > %s\n' % matched.group('limit') + return ' > %s\n' % matched.group('limit') def endsat(self, ctx, line): return ctx == line @@ -505,7 +505,7 @@ return line.startswith(self._prefix) def ignores(self, ctx): - return b' > %s\n' % heredocignorelimit == ctx + return ' > %s\n' % heredocignorelimit == ctx def filename(self, ctx): return None # no filename @@ -524,7 +524,7 @@ pyheredocmatcher(), # use '[^<]+' instead of '\S+', in order to match against # paths including whitespaces - fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py'), + fileheredocmatcher('heredoc .py file', r'[^<]+\.py'), ] def pyembedded(basefile, lines, errors): @@ -536,7 +536,7 @@ _shmatchers = [ # use '[^<]+' instead of '\S+', in order to match against # paths including whitespaces - fileheredocmatcher(b'heredoc .sh file', br'[^<]+\.sh'), + fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'), ] def shembedded(basefile, lines, errors): @@ -548,8 +548,8 @@ _hgrcmatchers = [ # use '[^<]+' instead of '\S+', in order to match against # paths including whitespaces - fileheredocmatcher(b'heredoc hgrc file', - br'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'), + fileheredocmatcher('heredoc hgrc file', + r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'), ] def hgrcembedded(basefile, lines, errors): @@ -565,14 +565,14 @@ errors = [] for name, starts, ends, code in embeddedfunc(basefile, lines, errors): if not name: - name = b'' - writeout(b"%s:%d: %s starts\n" % (basefile, starts, name)) + name = '' + writeout("%s:%d: %s starts\n" % (basefile, starts, name)) if opts.verbose and code: - writeout(b" |%s\n" % - b"\n |".join(l for l in code.splitlines())) - writeout(b"%s:%d: %s ends\n" % (basefile, ends, name)) + writeout(" |%s\n" % + "\n |".join(l for l in code.splitlines())) + writeout("%s:%d: %s ends\n" % (basefile, ends, name)) for e in errors: - writeerr(b"%s\n" % e) + writeerr("%s\n" % e) return len(errors) def applyembedded(args, embeddedfunc, opts): @@ -580,11 +580,11 @@ if args: for f in args: with opentext(f) as fp: - if showembedded(bytestr(f), fp, embeddedfunc, opts): + if showembedded(f, fp, embeddedfunc, opts): ret = 1 else: lines = [l for l in stdin.readlines()] - if showembedded(b'', lines, embeddedfunc, opts): + if showembedded('', lines, embeddedfunc, opts): ret = 1 return ret