revlog: make _partialmatch fail fast on almost-hex inputs
authorArseniy Alekseyev <aalekseyev@janestreet.com>
Mon, 15 Aug 2022 16:12:41 +0100
changeset 49415 5fe7e9eda0f3
parent 49414 3c5d0f879404
child 49416 075a553028e2
revlog: make _partialmatch fail fast on almost-hex inputs Before this change, resolving a revision like [0123456789^] on a large repo can take multiple seconds because: - hg does not realize this is a revset, so it tries various things, including _partialmatch(b"0123456789^") - after the rust lookup fails, it falls back to pure hg - pure hg takes all-but-last chars and converts them to binary, which *succeeds*, so it does the expensive part.
mercurial/revlog.py
--- a/mercurial/revlog.py	Tue Jul 12 01:13:56 2022 +0200
+++ b/mercurial/revlog.py	Mon Aug 15 16:12:41 2022 +0100
@@ -235,6 +235,8 @@
     b'  expected %d bytes from offset %d, data size is %d'
 )
 
+hexdigits = b'0123456789abcdefABCDEF'
+
 
 class revlog:
     """
@@ -1509,7 +1511,7 @@
                 ambiguous = True
             # fall through to slow path that filters hidden revisions
         except (AttributeError, ValueError):
-            # we are pure python, or key was too short to search radix tree
+            # we are pure python, or key is not hex
             pass
         if ambiguous:
             raise error.AmbiguousPrefixLookupError(
@@ -1523,6 +1525,11 @@
             # hex(node)[:...]
             l = len(id) // 2 * 2  # grab an even number of digits
             try:
+                # we're dropping the last digit, so let's check that it's hex,
+                # to avoid the expensive computation below if it's not
+                if len(id) % 2 > 0:
+                    if not (id[-1] in hexdigits):
+                        return None
                 prefix = bin(id[:l])
             except binascii.Error:
                 pass