mercurial/hgweb/wsgiheaders.py
author Manuel Jacob <me@manueljacob.de>
Mon, 11 Jul 2022 01:51:20 +0200
branchstable
changeset 49378 094a5fa3cf52
parent 48946 642e31cb55f0
permissions -rw-r--r--
procutil: make stream detection in make_line_buffered more correct and strict In make_line_buffered(), we don’t want to wrap the stream if we know that lines get flushed to the underlying raw stream already. Previously, the heuristic was too optimistic. It assumed that any stream which is not an instance of io.BufferedIOBase doesn’t need wrapping. However, there are buffered streams that aren’t instances of io.BufferedIOBase, like Mercurial’s own winstdout. The new logic is different in two ways: First, only for the check, if unwraps any combination of WriteAllWrapper and winstdout. Second, it skips wrapping the stream only if it is an instance of io.RawIOBase (or already wrapped). If it is an instance of io.BufferedIOBase, it gets wrapped. In any other case, the function raises an exception. This ensures that, if an unknown stream is passed or we add another wrapper in the future, we don’t wrap the stream if it’s already line buffered or not wrap the stream if it’s not line buffered. In fact, this was already helpful during development of this change. Without it, I possibly would have forgot that WriteAllWrapper needs to be ignored for the check, leading to unnecessary wrapping if stdout is unbuffered. The alternative would have been to always wrap unknown streams. However, I don’t think that anyone would benefit from being less strict. We can expect streams from the standard library to be subclassing either io.RawIOBase or io.BufferedIOBase, so running Mercurial in the standard way should not regress by this change. Py2exe might replace sys.stdout and sys.stderr, but that currently breaks Mercurial anyway and also these streams don’t claim to be interactive, so this function is not called for them.

"""This was forked from cpython's wsgiref.headers module to work on bytes.

Header from old file showing copyright is below.

Much of this module is red-handedly pilfered from email.message in the stdlib,
so portions are Copyright (C) 2001,2002 Python Software Foundation, and were
written by Barry Warsaw.
"""

# Regular expression that matches `special' characters in parameters, the
# existence of which force quoting of the parameter value.

import re

tspecials = re.compile(br'[ ()<>@,;:\\"/\[\]?=]')


def _formatparam(param, value=None, quote=1):
    """Convenience function to format and return a key=value pair.
    This will quote the value if needed or if quote is true.
    """
    if value is not None and len(value) > 0:
        if quote or tspecials.search(value):
            value = value.replace(b'\\', b'\\\\').replace(b'"', r'\"')
            return b'%s="%s"' % (param, value)
        else:
            return b'%s=%s' % (param, value)
    else:
        return param


class Headers:
    """Manage a collection of HTTP response headers"""

    def __init__(self, headers=None):
        headers = headers if headers is not None else []
        if type(headers) is not list:
            raise TypeError(b"Headers must be a list of name/value tuples")
        self._headers = headers
        if __debug__:
            for k, v in headers:
                self._convert_string_type(k)
                self._convert_string_type(v)

    def _convert_string_type(self, value):
        """Convert/check value type."""
        if type(value) is bytes:
            return value
        raise AssertionError(
            u"Header names/values must be"
            u" of type bytes (got %s)" % repr(value)
        )

    def __len__(self):
        """Return the total number of headers, including duplicates."""
        return len(self._headers)

    def __setitem__(self, name, val):
        """Set the value of a header."""
        del self[name]
        self._headers.append(
            (self._convert_string_type(name), self._convert_string_type(val))
        )

    def __delitem__(self, name):
        """Delete all occurrences of a header, if present.
        Does *not* raise an exception if the header is missing.
        """
        name = self._convert_string_type(name.lower())
        self._headers[:] = [kv for kv in self._headers if kv[0].lower() != name]

    def __getitem__(self, name):
        """Get the first header value for 'name'
        Return None if the header is missing instead of raising an exception.
        Note that if the header appeared multiple times, the first exactly which
        occurrence gets returned is undefined.  Use getall() to get all
        the values matching a header field name.
        """
        return self.get(name)

    def __contains__(self, name):
        """Return true if the message contains the header."""
        return self.get(name) is not None

    def get_all(self, name):
        """Return a list of all the values for the named field.
        These will be sorted in the order they appeared in the original header
        list or were added to this instance, and may contain duplicates.  Any
        fields deleted and re-inserted are always appended to the header list.
        If no fields exist with the given name, returns an empty list.
        """
        name = self._convert_string_type(name.lower())
        return [kv[1] for kv in self._headers if kv[0].lower() == name]

    def get(self, name, default=None):
        """Get the first header value for 'name', or return 'default'"""
        name = self._convert_string_type(name.lower())
        for k, v in self._headers:
            if k.lower() == name:
                return v
        return default

    def keys(self):
        """Return a list of all the header field names.
        These will be sorted in the order they appeared in the original header
        list, or were added to this instance, and may contain duplicates.
        Any fields deleted and re-inserted are always appended to the header
        list.
        """
        return [k for k, v in self._headers]

    def values(self):
        """Return a list of all header values.
        These will be sorted in the order they appeared in the original header
        list, or were added to this instance, and may contain duplicates.
        Any fields deleted and re-inserted are always appended to the header
        list.
        """
        return [v for k, v in self._headers]

    def items(self):
        """Get all the header fields and values.
        These will be sorted in the order they were in the original header
        list, or were added to this instance, and may contain duplicates.
        Any fields deleted and re-inserted are always appended to the header
        list.
        """
        return self._headers[:]

    def __repr__(self):
        return "%s(%r)" % (self.__class__.__name__, self._headers)

    def __str__(self):
        """str() returns the formatted headers, complete with end line,
        suitable for direct HTTP transmission."""
        return b'\r\n'.join(
            [b"%s: %s" % kv for kv in self._headers] + [b'', b'']
        )

    def __bytes__(self):
        return str(self).encode('iso-8859-1')

    def setdefault(self, name, value):
        """Return first matching header value for 'name', or 'value'
        If there is no header named 'name', add a new header with name 'name'
        and value 'value'."""
        result = self.get(name)
        if result is None:
            self._headers.append(
                (
                    self._convert_string_type(name),
                    self._convert_string_type(value),
                )
            )
            return value
        else:
            return result

    def add_header(self, _name, _value, **_params):
        """Extended header setting.
        _name is the header field to add.  keyword arguments can be used to set
        additional parameters for the header field, with underscores converted
        to dashes.  Normally the parameter will be added as key="value" unless
        value is None, in which case only the key will be added.
        Example:
        h.add_header('content-disposition', 'attachment', filename='bud.gif')
        Note that unlike the corresponding 'email.message' method, this does
        *not* handle '(charset, language, value)' tuples: all values must be
        strings or None.
        """
        parts = []
        if _value is not None:
            _value = self._convert_string_type(_value)
            parts.append(_value)
        for k, v in _params.items():
            k = self._convert_string_type(k)
            if v is None:
                parts.append(k.replace(b'_', b'-'))
            else:
                v = self._convert_string_type(v)
                parts.append(_formatparam(k.replace(b'_', b'-'), v))
        self._headers.append(
            (self._convert_string_type(_name), b"; ".join(parts))
        )