sslutil: synchronize hostname matching logic with CPython stable 3.8.4
authorGregory Szorc <gregory.szorc@gmail.com>
Sun, 26 Jun 2016 19:34:48 -0700
branchstable
changeset 29452 26a5d605b868
parent 29451 676f4d0e3a7b
child 29453 e26385329c53
sslutil: synchronize hostname matching logic with CPython sslutil contains its own hostname matching logic. CPython has code for the same intent. However, it is only available to Python 2.7.9+ (or distributions that have backported 2.7.9's ssl module improvements). This patch effectively imports CPython's hostname matching code from its ssl.py into sslutil.py. The hostname matching code itself is pretty similar. However, the DNS name matching code is much more robust and spec conformant. As the test changes show, this changes some behavior around wildcard handling and IDNA matching. The new behavior allows wildcards in the middle of words (e.g. 'f*.com' matches 'foo.com') This is spec compliant according to RFC 6125 Section 6.5.3 item 3. There is one test where the matcher is more strict. Before, '*.a.com' matched '.a.com'. Now it doesn't match. Strictly speaking this is a security vulnerability.
mercurial/sslutil.py
tests/test-url.py
--- a/mercurial/sslutil.py	Sun Jun 26 19:16:54 2016 -0700
+++ b/mercurial/sslutil.py	Sun Jun 26 19:34:48 2016 -0700
@@ -10,6 +10,7 @@
 from __future__ import absolute_import
 
 import os
+import re
 import ssl
 import sys
 
@@ -167,6 +168,57 @@
         raise error.Abort(_('ssl connection failed'))
     return sslsocket
 
+class wildcarderror(Exception):
+    """Represents an error parsing wildcards in DNS name."""
+
+def _dnsnamematch(dn, hostname, maxwildcards=1):
+    """Match DNS names according RFC 6125 section 6.4.3.
+
+    This code is effectively copied from CPython's ssl._dnsname_match.
+
+    Returns a bool indicating whether the expected hostname matches
+    the value in ``dn``.
+    """
+    pats = []
+    if not dn:
+        return False
+
+    pieces = dn.split(r'.')
+    leftmost = pieces[0]
+    remainder = pieces[1:]
+    wildcards = leftmost.count('*')
+    if wildcards > maxwildcards:
+        raise wildcarderror(
+            _('too many wildcards in certificate DNS name: %s') % dn)
+
+    # speed up common case w/o wildcards
+    if not wildcards:
+        return dn.lower() == hostname.lower()
+
+    # RFC 6125, section 6.4.3, subitem 1.
+    # The client SHOULD NOT attempt to match a presented identifier in which
+    # the wildcard character comprises a label other than the left-most label.
+    if leftmost == '*':
+        # When '*' is a fragment by itself, it matches a non-empty dotless
+        # fragment.
+        pats.append('[^.]+')
+    elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
+        # RFC 6125, section 6.4.3, subitem 3.
+        # The client SHOULD NOT attempt to match a presented identifier
+        # where the wildcard character is embedded within an A-label or
+        # U-label of an internationalized domain name.
+        pats.append(re.escape(leftmost))
+    else:
+        # Otherwise, '*' matches any dotless string, e.g. www*
+        pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
+
+    # add the remaining fragments, ignore any wildcards
+    for frag in remainder:
+        pats.append(re.escape(frag))
+
+    pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
+    return pat.match(hostname) is not None
+
 def _verifycert(cert, hostname):
     '''Verify that cert (in socket.getpeercert() format) matches hostname.
     CRLs is not handled.
@@ -175,33 +227,46 @@
     '''
     if not cert:
         return _('no certificate received')
-    dnsname = hostname.lower()
-    def matchdnsname(certname):
-        return (certname == dnsname or
-                '.' in dnsname and certname == '*.' + dnsname.split('.', 1)[1])
 
+    dnsnames = []
     san = cert.get('subjectAltName', [])
-    if san:
-        certnames = [value.lower() for key, value in san if key == 'DNS']
-        for name in certnames:
-            if matchdnsname(name):
-                return None
-        if certnames:
-            return _('certificate is for %s') % ', '.join(certnames)
+    for key, value in san:
+        if key == 'DNS':
+            try:
+                if _dnsnamematch(value, hostname):
+                    return
+            except wildcarderror as e:
+                return e.message
+
+            dnsnames.append(value)
 
-    # subject is only checked when subjectAltName is empty
-    for s in cert.get('subject', []):
-        key, value = s[0]
-        if key == 'commonName':
-            try:
-                # 'subject' entries are unicode
-                certname = value.lower().encode('ascii')
-            except UnicodeEncodeError:
-                return _('IDN in certificate not supported')
-            if matchdnsname(certname):
-                return None
-            return _('certificate is for %s') % certname
-    return _('no commonName or subjectAltName found in certificate')
+    if not dnsnames:
+        # The subject is only checked when there is no DNS in subjectAltName.
+        for sub in cert.get('subject', []):
+            for key, value in sub:
+                # According to RFC 2818 the most specific Common Name must
+                # be used.
+                if key == 'commonName':
+                    # 'subject' entries are unicide.
+                    try:
+                        value = value.encode('ascii')
+                    except UnicodeEncodeError:
+                        return _('IDN in certificate not supported')
+
+                    try:
+                        if _dnsnamematch(value, hostname):
+                            return
+                    except wildcarderror as e:
+                        return e.message
+
+                    dnsnames.append(value)
+
+    if len(dnsnames) > 1:
+        return _('certificate is for %s') % ', '.join(dnsnames)
+    elif len(dnsnames) == 1:
+        return _('certificate is for %s') % dnsnames[0]
+    else:
+        return _('no commonName or subjectAltName found in certificate')
 
 
 # CERT_REQUIRED means fetch the cert from the server all the time AND
--- a/tests/test-url.py	Sun Jun 26 19:16:54 2016 -0700
+++ b/tests/test-url.py	Sun Jun 26 19:34:48 2016 -0700
@@ -51,8 +51,7 @@
 # Avoid some pitfalls
 check(_verifycert(cert('*.foo'), 'foo'),
       'certificate is for *.foo')
-check(_verifycert(cert('*o'), 'foo'),
-      'certificate is for *o')
+check(_verifycert(cert('*o'), 'foo'), None)
 
 check(_verifycert({'subject': ()},
                   'example.com'),
@@ -82,13 +81,12 @@
       'certificate is for *.a.com')
 check(_verifycert(cert('*.a.com'), 'Xa.com'),
       'certificate is for *.a.com')
-check(_verifycert(cert('*.a.com'), '.a.com'), None)
+check(_verifycert(cert('*.a.com'), '.a.com'),
+      'certificate is for *.a.com')
 
 # only match one left-most wildcard
-check(_verifycert(cert('f*.com'), 'foo.com'),
-      'certificate is for f*.com')
-check(_verifycert(cert('f*.com'), 'f.com'),
-      'certificate is for f*.com')
+check(_verifycert(cert('f*.com'), 'foo.com'), None)
+check(_verifycert(cert('f*.com'), 'f.com'), None)
 check(_verifycert(cert('f*.com'), 'bar.com'),
       'certificate is for f*.com')
 check(_verifycert(cert('f*.com'), 'foo.a.com'),
@@ -136,10 +134,10 @@
 idna = u'www*.pythön.org'.encode('idna').decode('ascii')
 check(_verifycert(cert(idna),
                   u'www.pythön.org'.encode('idna').decode('ascii')),
-      'certificate is for www*.xn--pythn-mua.org')
+      None)
 check(_verifycert(cert(idna),
                   u'www1.pythön.org'.encode('idna').decode('ascii')),
-      'certificate is for www*.xn--pythn-mua.org')
+      None)
 check(_verifycert(cert(idna),
                   u'ftp.pythön.org'.encode('idna').decode('ascii')),
       'certificate is for www*.xn--pythn-mua.org')
@@ -229,11 +227,12 @@
 # avoid denials of service by refusing more than one
 # wildcard per fragment.
 check(_verifycert({'subject': (((u'commonName', u'a*b.com'),),)},
-                  'axxb.com'), 'certificate is for a*b.com')
+                  'axxb.com'), None)
 check(_verifycert({'subject': (((u'commonName', u'a*b.co*'),),)},
                   'axxb.com'), 'certificate is for a*b.co*')
 check(_verifycert({'subject': (((u'commonName', u'a*b*.com'),),)},
-                  'axxbxxc.com'), 'certificate is for a*b*.com')
+                  'axxbxxc.com'),
+      'too many wildcards in certificate DNS name: a*b*.com')
 
 def test_url():
     """