# HG changeset patch # User Arseniy Alekseyev # Date 1660576361 -3600 # Node ID 5fe7e9eda0f32a7f77113a8278a969e9e392bfa8 # Parent 3c5d0f87940434a74da9546dad46fd098818b830 revlog: make _partialmatch fail fast on almost-hex inputs Before this change, resolving a revision like [0123456789^] on a large repo can take multiple seconds because: - hg does not realize this is a revset, so it tries various things, including _partialmatch(b"0123456789^") - after the rust lookup fails, it falls back to pure hg - pure hg takes all-but-last chars and converts them to binary, which *succeeds*, so it does the expensive part. diff -r 3c5d0f879404 -r 5fe7e9eda0f3 mercurial/revlog.py --- a/mercurial/revlog.py Tue Jul 12 01:13:56 2022 +0200 +++ b/mercurial/revlog.py Mon Aug 15 16:12:41 2022 +0100 @@ -235,6 +235,8 @@ b' expected %d bytes from offset %d, data size is %d' ) +hexdigits = b'0123456789abcdefABCDEF' + class revlog: """ @@ -1509,7 +1511,7 @@ ambiguous = True # fall through to slow path that filters hidden revisions except (AttributeError, ValueError): - # we are pure python, or key was too short to search radix tree + # we are pure python, or key is not hex pass if ambiguous: raise error.AmbiguousPrefixLookupError( @@ -1523,6 +1525,11 @@ # hex(node)[:...] l = len(id) // 2 * 2 # grab an even number of digits try: + # we're dropping the last digit, so let's check that it's hex, + # to avoid the expensive computation below if it's not + if len(id) % 2 > 0: + if not (id[-1] in hexdigits): + return None prefix = bin(id[:l]) except binascii.Error: pass