mercurial/httpclient/__init__.py
branchstable
changeset 37788 ed5448edcbfa
parent 37287 fb92df8b634c
parent 37787 92213f6745ed
child 37789 bfd32db06952
equal deleted inserted replaced
37287:fb92df8b634c 37788:ed5448edcbfa
     1 # Copyright 2010, Google Inc.
       
     2 # All rights reserved.
       
     3 #
       
     4 # Redistribution and use in source and binary forms, with or without
       
     5 # modification, are permitted provided that the following conditions are
       
     6 # met:
       
     7 #
       
     8 #     * Redistributions of source code must retain the above copyright
       
     9 # notice, this list of conditions and the following disclaimer.
       
    10 #     * Redistributions in binary form must reproduce the above
       
    11 # copyright notice, this list of conditions and the following disclaimer
       
    12 # in the documentation and/or other materials provided with the
       
    13 # distribution.
       
    14 #     * Neither the name of Google Inc. nor the names of its
       
    15 # contributors may be used to endorse or promote products derived from
       
    16 # this software without specific prior written permission.
       
    17 
       
    18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
       
    19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
       
    20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
       
    21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
       
    22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
       
    23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
       
    24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       
    25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    29 """Improved HTTP/1.1 client library
       
    30 
       
    31 This library contains an HTTPConnection which is similar to the one in
       
    32 httplib, but has several additional features:
       
    33 
       
    34   * supports keepalives natively
       
    35   * uses select() to block for incoming data
       
    36   * notices when the server responds early to a request
       
    37   * implements ssl inline instead of in a different class
       
    38 """
       
    39 from __future__ import absolute_import
       
    40 
       
    41 # Many functions in this file have too many arguments.
       
    42 # pylint: disable=R0913
       
    43 import email
       
    44 import email.message
       
    45 import errno
       
    46 import inspect
       
    47 import logging
       
    48 import select
       
    49 import socket
       
    50 import ssl
       
    51 import sys
       
    52 
       
    53 try:
       
    54     import cStringIO as io
       
    55     io.StringIO
       
    56 except ImportError:
       
    57     import io
       
    58 
       
    59 try:
       
    60     import httplib
       
    61     httplib.HTTPException
       
    62 except ImportError:
       
    63     import http.client as httplib
       
    64 
       
    65 from . import (
       
    66     _readers,
       
    67 )
       
    68 
       
    69 logger = logging.getLogger(__name__)
       
    70 
       
    71 __all__ = ['HTTPConnection', 'HTTPResponse']
       
    72 
       
    73 HTTP_VER_1_0 = b'HTTP/1.0'
       
    74 HTTP_VER_1_1 = b'HTTP/1.1'
       
    75 
       
    76 OUTGOING_BUFFER_SIZE = 1 << 15
       
    77 INCOMING_BUFFER_SIZE = 1 << 20
       
    78 
       
    79 HDR_ACCEPT_ENCODING = 'accept-encoding'
       
    80 HDR_CONNECTION_CTRL = 'connection'
       
    81 HDR_CONTENT_LENGTH = 'content-length'
       
    82 HDR_XFER_ENCODING = 'transfer-encoding'
       
    83 
       
    84 XFER_ENCODING_CHUNKED = 'chunked'
       
    85 
       
    86 CONNECTION_CLOSE = 'close'
       
    87 
       
    88 EOL = b'\r\n'
       
    89 _END_HEADERS = EOL * 2
       
    90 
       
    91 # Based on some searching around, 1 second seems like a reasonable
       
    92 # default here.
       
    93 TIMEOUT_ASSUME_CONTINUE = 1
       
    94 TIMEOUT_DEFAULT = None
       
    95 
       
    96 if sys.version_info > (3, 0):
       
    97     _unicode = str
       
    98 else:
       
    99     _unicode = unicode
       
   100 
       
   101 def _ensurebytes(data):
       
   102     if not isinstance(data, (_unicode, bytes)):
       
   103         data = str(data)
       
   104     if not isinstance(data, bytes):
       
   105         try:
       
   106             return data.encode('latin-1')
       
   107         except UnicodeEncodeError as err:
       
   108             raise UnicodeEncodeError(
       
   109                 err.encoding,
       
   110                 err.object,
       
   111                 err.start,
       
   112                 err.end,
       
   113                 '%r is not valid Latin-1 Use .encode("utf-8") '
       
   114                 'if sending as utf-8 is desired.' % (
       
   115                     data[err.start:err.end],))
       
   116     return data
       
   117 
       
   118 class _CompatMessage(email.message.Message):
       
   119     """Workaround for rfc822.Message and email.message.Message API diffs."""
       
   120 
       
   121     @classmethod
       
   122     def from_string(cls, s):
       
   123         if sys.version_info > (3, 0):
       
   124             # Python 3 can't decode headers from bytes, so we have to
       
   125             # trust RFC 2616 and decode the headers as iso-8859-1
       
   126             # bytes.
       
   127             s = s.decode('iso-8859-1')
       
   128         headers = email.message_from_string(s, _class=_CompatMessage)
       
   129         # Fix multi-line headers to match httplib's behavior from
       
   130         # Python 2.x, since email.message.Message handles them in
       
   131         # slightly different ways.
       
   132         if sys.version_info < (3, 0):
       
   133             new = []
       
   134             for h, v in headers._headers:
       
   135                 if '\r\n' in v:
       
   136                     v = '\n'.join([' ' + x.lstrip() for x in v.split('\r\n')])[1:]
       
   137                 new.append((h, v))
       
   138             headers._headers = new
       
   139         return headers
       
   140 
       
   141     def getheaders(self, key):
       
   142         return self.get_all(key)
       
   143 
       
   144     def getheader(self, key, default=None):
       
   145         return self.get(key, failobj=default)
       
   146 
       
   147 
       
   148 class HTTPResponse(object):
       
   149     """Response from an HTTP server.
       
   150 
       
   151     The response will continue to load as available. If you need the
       
   152     complete response before continuing, check the .complete() method.
       
   153     """
       
   154     def __init__(self, sock, timeout, method):
       
   155         self.sock = sock
       
   156         self.method = method
       
   157         self.raw_response = b''
       
   158         self._headers_len = 0
       
   159         self.headers = None
       
   160         self.will_close = False
       
   161         self.status_line = b''
       
   162         self.status = None
       
   163         self.continued = False
       
   164         self.http_version = None
       
   165         self.reason = None
       
   166         self._reader = None
       
   167 
       
   168         self._read_location = 0
       
   169         self._eol = EOL
       
   170 
       
   171         self._timeout = timeout
       
   172 
       
   173     @property
       
   174     def _end_headers(self):
       
   175         return self._eol * 2
       
   176 
       
   177     def complete(self):
       
   178         """Returns true if this response is completely loaded.
       
   179 
       
   180         Note that if this is a connection where complete means the
       
   181         socket is closed, this will nearly always return False, even
       
   182         in cases where all the data has actually been loaded.
       
   183         """
       
   184         if self._reader:
       
   185             return self._reader.done()
       
   186 
       
   187     def _close(self):
       
   188         if self._reader is not None:
       
   189             # We're a friend of the reader class here.
       
   190             # pylint: disable=W0212
       
   191             self._reader._close()
       
   192 
       
   193     def getheader(self, header, default=None):
       
   194         return self.headers.getheader(header, default=default)
       
   195 
       
   196     def getheaders(self):
       
   197         if sys.version_info < (3, 0):
       
   198             return [(k.lower(), v) for k, v in self.headers.items()]
       
   199         # Starting in Python 3, headers aren't lowercased before being
       
   200         # returned here.
       
   201         return self.headers.items()
       
   202 
       
   203     def readline(self):
       
   204         """Read a single line from the response body.
       
   205 
       
   206         This may block until either a line ending is found or the
       
   207         response is complete.
       
   208         """
       
   209         blocks = []
       
   210         while True:
       
   211             self._reader.readto(b'\n', blocks)
       
   212 
       
   213             if blocks and blocks[-1][-1:] == b'\n' or self.complete():
       
   214                 break
       
   215 
       
   216             self._select()
       
   217 
       
   218         return b''.join(blocks)
       
   219 
       
   220     def read(self, length=None):
       
   221         """Read data from the response body."""
       
   222         # if length is None, unbounded read
       
   223         while (not self.complete()  # never select on a finished read
       
   224                and (not length  # unbounded, so we wait for complete()
       
   225                     or length > self._reader.available_data)):
       
   226             self._select()
       
   227         if not length:
       
   228             length = self._reader.available_data
       
   229         r = self._reader.read(length)
       
   230         if self.complete() and self.will_close:
       
   231             self.sock.close()
       
   232         return r
       
   233 
       
   234     def _select(self):
       
   235         r, unused_write, unused_err = select.select(
       
   236             [self.sock], [], [], self._timeout)
       
   237         if not r:
       
   238             # socket was not readable. If the response is not
       
   239             # complete, raise a timeout.
       
   240             if not self.complete():
       
   241                 logger.info('timed out with timeout of %s', self._timeout)
       
   242                 raise HTTPTimeoutException('timeout reading data')
       
   243         try:
       
   244             data = self.sock.recv(INCOMING_BUFFER_SIZE)
       
   245         except ssl.SSLError as e:
       
   246             if e.args[0] != ssl.SSL_ERROR_WANT_READ:
       
   247                 raise
       
   248             logger.debug('SSL_ERROR_WANT_READ in _select, should retry later')
       
   249             return True
       
   250         logger.debug('response read %d data during _select', len(data))
       
   251         # If the socket was readable and no data was read, that means
       
   252         # the socket was closed. Inform the reader (if any) so it can
       
   253         # raise an exception if this is an invalid situation.
       
   254         if not data:
       
   255             if self._reader:
       
   256                 # We're a friend of the reader class here.
       
   257                 # pylint: disable=W0212
       
   258                 self._reader._close()
       
   259             return False
       
   260         else:
       
   261             self._load_response(data)
       
   262             return True
       
   263 
       
   264     # This method gets replaced by _load later, which confuses pylint.
       
   265     def _load_response(self, data): # pylint: disable=E0202
       
   266         # Being here implies we're not at the end of the headers yet,
       
   267         # since at the end of this method if headers were completely
       
   268         # loaded we replace this method with the load() method of the
       
   269         # reader we created.
       
   270         self.raw_response += data
       
   271         # This is a bogus server with bad line endings
       
   272         if self._eol not in self.raw_response:
       
   273             for bad_eol in (b'\n', b'\r'):
       
   274                 if (bad_eol in self.raw_response
       
   275                     # verify that bad_eol is not the end of the incoming data
       
   276                     # as this could be a response line that just got
       
   277                     # split between \r and \n.
       
   278                     and (self.raw_response.index(bad_eol) <
       
   279                          (len(self.raw_response) - 1))):
       
   280                     logger.info('bogus line endings detected, '
       
   281                                 'using %r for EOL', bad_eol)
       
   282                     self._eol = bad_eol
       
   283                     break
       
   284         # exit early if not at end of headers
       
   285         if self._end_headers not in self.raw_response or self.headers:
       
   286             return
       
   287 
       
   288         # handle 100-continue response
       
   289         hdrs, body = self.raw_response.split(self._end_headers, 1)
       
   290         unused_http_ver, status = hdrs.split(b' ', 1)
       
   291         if status.startswith(b'100'):
       
   292             self.raw_response = body
       
   293             self.continued = True
       
   294             logger.debug('continue seen, setting body to %r', body)
       
   295             return
       
   296 
       
   297         # arriving here means we should parse response headers
       
   298         # as all headers have arrived completely
       
   299         hdrs, body = self.raw_response.split(self._end_headers, 1)
       
   300         del self.raw_response
       
   301         if self._eol in hdrs:
       
   302             self.status_line, hdrs = hdrs.split(self._eol, 1)
       
   303         else:
       
   304             self.status_line = hdrs
       
   305             hdrs = b''
       
   306         # TODO HTTP < 1.0 support
       
   307         (self.http_version, self.status,
       
   308          self.reason) = self.status_line.split(b' ', 2)
       
   309         self.status = int(self.status)
       
   310         if self._eol != EOL:
       
   311             hdrs = hdrs.replace(self._eol, b'\r\n')
       
   312         headers = _CompatMessage.from_string(hdrs)
       
   313         content_len = None
       
   314         if HDR_CONTENT_LENGTH in headers:
       
   315             content_len = int(headers[HDR_CONTENT_LENGTH])
       
   316         if self.http_version == HTTP_VER_1_0:
       
   317             self.will_close = True
       
   318         elif HDR_CONNECTION_CTRL in headers:
       
   319             self.will_close = (
       
   320                 headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE)
       
   321         if (HDR_XFER_ENCODING in headers
       
   322             and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED):
       
   323             self._reader = _readers.ChunkedReader(self._eol)
       
   324             logger.debug('using a chunked reader')
       
   325         else:
       
   326             # HEAD responses are forbidden from returning a body, and
       
   327             # it's implausible for a CONNECT response to use
       
   328             # close-is-end logic for an OK response.
       
   329             if (self.method == b'HEAD' or
       
   330                 (self.method == b'CONNECT' and content_len is None)):
       
   331                 content_len = 0
       
   332             if content_len is not None:
       
   333                 logger.debug('using a content-length reader with length %d',
       
   334                              content_len)
       
   335                 self._reader = _readers.ContentLengthReader(content_len)
       
   336             else:
       
   337                 # Response body had no length specified and is not
       
   338                 # chunked, so the end of the body will only be
       
   339                 # identifiable by the termination of the socket by the
       
   340                 # server. My interpretation of the spec means that we
       
   341                 # are correct in hitting this case if
       
   342                 # transfer-encoding, content-length, and
       
   343                 # connection-control were left unspecified.
       
   344                 self._reader = _readers.CloseIsEndReader()
       
   345                 logger.debug('using a close-is-end reader')
       
   346                 self.will_close = True
       
   347 
       
   348         if body:
       
   349             # We're a friend of the reader class here.
       
   350             # pylint: disable=W0212
       
   351             self._reader._load(body)
       
   352         logger.debug('headers complete')
       
   353         self.headers = headers
       
   354         # We're a friend of the reader class here.
       
   355         # pylint: disable=W0212
       
   356         self._load_response = self._reader._load
       
   357 
       
   358 def _foldheaders(headers):
       
   359     """Given some headers, rework them so we can safely overwrite values.
       
   360 
       
   361     >>> _foldheaders({'Accept-Encoding': 'wat'})
       
   362     {'accept-encoding': ('Accept-Encoding', 'wat')}
       
   363     """
       
   364     return dict((k.lower(), (k, v)) for k, v in headers.items())
       
   365 
       
   366 try:
       
   367     inspect.signature
       
   368     def _handlesarg(func, arg):
       
   369         """ Try to determine if func accepts arg
       
   370 
       
   371         If it takes arg, return True
       
   372         If it happens to take **args, then it could do anything:
       
   373             * It could throw a different TypeError, just for fun
       
   374             * It could throw an ArgumentError or anything else
       
   375             * It could choose not to throw an Exception at all
       
   376         ... return 'unknown'
       
   377 
       
   378         Otherwise, return False
       
   379         """
       
   380         params = inspect.signature(func).parameters
       
   381         if arg in params:
       
   382             return True
       
   383         for p in params:
       
   384             if params[p].kind == inspect._ParameterKind.VAR_KEYWORD:
       
   385                 return 'unknown'
       
   386         return False
       
   387 except AttributeError:
       
   388     def _handlesarg(func, arg):
       
   389         """ Try to determine if func accepts arg
       
   390 
       
   391         If it takes arg, return True
       
   392         If it happens to take **args, then it could do anything:
       
   393             * It could throw a different TypeError, just for fun
       
   394             * It could throw an ArgumentError or anything else
       
   395             * It could choose not to throw an Exception at all
       
   396         ... return 'unknown'
       
   397 
       
   398         Otherwise, return False
       
   399         """
       
   400         spec = inspect.getargspec(func)
       
   401         if arg in spec.args:
       
   402             return True
       
   403         if spec.keywords:
       
   404             return 'unknown'
       
   405         return False
       
   406 
       
   407 class HTTPConnection(object):
       
   408     """Connection to a single http server.
       
   409 
       
   410     Supports 100-continue and keepalives natively. Uses select() for
       
   411     non-blocking socket operations.
       
   412     """
       
   413     http_version = HTTP_VER_1_1
       
   414     response_class = HTTPResponse
       
   415 
       
   416     def __init__(self, host, port=None, use_ssl=None, ssl_validator=None,
       
   417                  timeout=TIMEOUT_DEFAULT,
       
   418                  continue_timeout=TIMEOUT_ASSUME_CONTINUE,
       
   419                  proxy_hostport=None, proxy_headers=None,
       
   420                  ssl_wrap_socket=None, **ssl_opts):
       
   421         """Create a new HTTPConnection.
       
   422 
       
   423         Args:
       
   424           host: The host to which we'll connect.
       
   425           port: Optional. The port over which we'll connect. Default 80 for
       
   426                 non-ssl, 443 for ssl.
       
   427           use_ssl: Optional. Whether to use ssl. Defaults to False if port is
       
   428                    not 443, true if port is 443.
       
   429           ssl_validator: a function(socket) to validate the ssl cert
       
   430           timeout: Optional. Connection timeout, default is TIMEOUT_DEFAULT.
       
   431           continue_timeout: Optional. Timeout for waiting on an expected
       
   432                    "100 Continue" response. Default is TIMEOUT_ASSUME_CONTINUE.
       
   433           proxy_hostport: Optional. Tuple of (host, port) to use as an http
       
   434                        proxy for the connection. Default is to not use a proxy.
       
   435           proxy_headers: Optional dict of header keys and values to send to
       
   436                          a proxy when using CONNECT. For compatibility with
       
   437                          httplib, the Proxy-Authorization header may be
       
   438                          specified in headers for request(), which will clobber
       
   439                          any such header specified here if specified. Providing
       
   440                          this option and not proxy_hostport will raise an
       
   441                          ValueError.
       
   442           ssl_wrap_socket: Optional function to use for wrapping
       
   443             sockets. If unspecified, the one from the ssl module will
       
   444             be used if available, or something that's compatible with
       
   445             it if on a Python older than 2.6.
       
   446 
       
   447         Any extra keyword arguments to this function will be provided
       
   448         to the ssl_wrap_socket method. If no ssl
       
   449         """
       
   450         host = _ensurebytes(host)
       
   451         if port is None and host.count(b':') == 1 or b']:' in host:
       
   452             host, port = host.rsplit(b':', 1)
       
   453             port = int(port)
       
   454             if b'[' in host:
       
   455                 host = host[1:-1]
       
   456         if ssl_wrap_socket is not None:
       
   457             _wrap_socket = ssl_wrap_socket
       
   458         else:
       
   459             _wrap_socket = ssl.wrap_socket
       
   460         call_wrap_socket = None
       
   461         handlesubar = _handlesarg(_wrap_socket, 'server_hostname')
       
   462         if handlesubar is True:
       
   463             # supports server_hostname
       
   464             call_wrap_socket = _wrap_socket
       
   465         handlesnobar = _handlesarg(_wrap_socket, 'serverhostname')
       
   466         if handlesnobar is True and handlesubar is not True:
       
   467             # supports serverhostname
       
   468             def call_wrap_socket(sock, server_hostname=None, **ssl_opts):
       
   469                 return _wrap_socket(sock, serverhostname=server_hostname,
       
   470                                     **ssl_opts)
       
   471         if handlesubar is False and handlesnobar is False:
       
   472             # does not support either
       
   473             def call_wrap_socket(sock, server_hostname=None, **ssl_opts):
       
   474                 return _wrap_socket(sock, **ssl_opts)
       
   475         if call_wrap_socket is None:
       
   476             # we assume it takes **args
       
   477             def call_wrap_socket(sock, **ssl_opts):
       
   478                 if 'server_hostname' in ssl_opts:
       
   479                     ssl_opts['serverhostname'] = ssl_opts['server_hostname']
       
   480                 return _wrap_socket(sock, **ssl_opts)
       
   481         self._ssl_wrap_socket = call_wrap_socket
       
   482         if use_ssl is None and port is None:
       
   483             use_ssl = False
       
   484             port = 80
       
   485         elif use_ssl is None:
       
   486             use_ssl = (port == 443)
       
   487         elif port is None:
       
   488             port = (use_ssl and 443 or 80)
       
   489         self.port = port
       
   490         self.ssl = use_ssl
       
   491         self.ssl_opts = ssl_opts
       
   492         self._ssl_validator = ssl_validator
       
   493         self.host = host
       
   494         self.sock = None
       
   495         self._current_response = None
       
   496         self._current_response_taken = False
       
   497         if proxy_hostport is None:
       
   498             self._proxy_host = self._proxy_port = None
       
   499             if proxy_headers:
       
   500                 raise ValueError(
       
   501                     'proxy_headers may not be specified unless '
       
   502                     'proxy_hostport is also specified.')
       
   503             else:
       
   504                 self._proxy_headers = {}
       
   505         else:
       
   506             self._proxy_host, self._proxy_port = proxy_hostport
       
   507             self._proxy_headers = _foldheaders(proxy_headers or {})
       
   508 
       
   509         self.timeout = timeout
       
   510         self.continue_timeout = continue_timeout
       
   511 
       
   512     def _connect(self, proxy_headers):
       
   513         """Connect to the host and port specified in __init__."""
       
   514         if self.sock:
       
   515             return
       
   516         if self._proxy_host is not None:
       
   517             logger.info('Connecting to http proxy %s:%s',
       
   518                         self._proxy_host, self._proxy_port)
       
   519             sock = socket.create_connection((self._proxy_host,
       
   520                                              self._proxy_port))
       
   521             if self.ssl:
       
   522                 data = self._buildheaders(b'CONNECT', b'%s:%d' % (self.host,
       
   523                                                                   self.port),
       
   524                                           proxy_headers, HTTP_VER_1_0)
       
   525                 sock.send(data)
       
   526                 sock.setblocking(0)
       
   527                 r = self.response_class(sock, self.timeout, b'CONNECT')
       
   528                 timeout_exc = HTTPTimeoutException(
       
   529                     'Timed out waiting for CONNECT response from proxy')
       
   530                 while not r.complete():
       
   531                     try:
       
   532                         # We're a friend of the response class, so let
       
   533                         # us use the private attribute.
       
   534                         # pylint: disable=W0212
       
   535                         if not r._select():
       
   536                             if not r.complete():
       
   537                                 raise timeout_exc
       
   538                     except HTTPTimeoutException:
       
   539                         # This raise/except pattern looks goofy, but
       
   540                         # _select can raise the timeout as well as the
       
   541                         # loop body. I wish it wasn't this convoluted,
       
   542                         # but I don't have a better solution
       
   543                         # immediately handy.
       
   544                         raise timeout_exc
       
   545                 if r.status != 200:
       
   546                     raise HTTPProxyConnectFailedException(
       
   547                         'Proxy connection failed: %d %s' % (r.status,
       
   548                                                             r.read()))
       
   549                 logger.info('CONNECT (for SSL) to %s:%s via proxy succeeded.',
       
   550                             self.host, self.port)
       
   551         else:
       
   552             sock = socket.create_connection((self.host, self.port))
       
   553         if self.ssl:
       
   554             # This is the default, but in the case of proxied SSL
       
   555             # requests the proxy logic above will have cleared
       
   556             # blocking mode, so re-enable it just to be safe.
       
   557             sock.setblocking(1)
       
   558             logger.debug('wrapping socket for ssl with options %r',
       
   559                          self.ssl_opts)
       
   560             sock = self._ssl_wrap_socket(sock, server_hostname=self.host,
       
   561                                          **self.ssl_opts)
       
   562             if self._ssl_validator:
       
   563                 self._ssl_validator(sock)
       
   564         sock.setblocking(0)
       
   565         self.sock = sock
       
   566 
       
   567     def _buildheaders(self, method, path, headers, http_ver):
       
   568         if self.ssl and self.port == 443 or self.port == 80:
       
   569             # default port for protocol, so leave it out
       
   570             hdrhost = self.host
       
   571         else:
       
   572             # include nonstandard port in header
       
   573             if b':' in self.host:  # must be IPv6
       
   574                 hdrhost = b'[%s]:%d' % (self.host, self.port)
       
   575             else:
       
   576                 hdrhost = b'%s:%d' % (self.host, self.port)
       
   577         if self._proxy_host and not self.ssl:
       
   578             # When talking to a regular http proxy we must send the
       
   579             # full URI, but in all other cases we must not (although
       
   580             # technically RFC 2616 says servers must accept our
       
   581             # request if we screw up, experimentally few do that
       
   582             # correctly.)
       
   583             assert path[0:1] == b'/', 'path must start with a /'
       
   584             path = b'http://%s%s' % (hdrhost, path)
       
   585         outgoing = [b'%s %s %s%s' % (method, path, http_ver, EOL)]
       
   586         headers[b'host'] = (b'Host', hdrhost)
       
   587         headers[HDR_ACCEPT_ENCODING] = (HDR_ACCEPT_ENCODING, 'identity')
       
   588         for hdr, val in sorted((_ensurebytes(h), _ensurebytes(v))
       
   589                                for h, v in headers.values()):
       
   590             outgoing.append(b'%s: %s%s' % (hdr, val, EOL))
       
   591         outgoing.append(EOL)
       
   592         return b''.join(outgoing)
       
   593 
       
   594     def close(self):
       
   595         """Close the connection to the server.
       
   596 
       
   597         This is a no-op if the connection is already closed. The
       
   598         connection may automatically close if requested by the server
       
   599         or required by the nature of a response.
       
   600         """
       
   601         if self.sock is None:
       
   602             return
       
   603         self.sock.close()
       
   604         self.sock = None
       
   605         logger.info('closed connection to %s on %s', self.host, self.port)
       
   606 
       
   607     def busy(self):
       
   608         """Returns True if this connection object is currently in use.
       
   609 
       
   610         If a response is still pending, this will return True, even if
       
   611         the request has finished sending. In the future,
       
   612         HTTPConnection may transparently juggle multiple connections
       
   613         to the server, in which case this will be useful to detect if
       
   614         any of those connections is ready for use.
       
   615         """
       
   616         cr = self._current_response
       
   617         if cr is not None:
       
   618             if self._current_response_taken:
       
   619                 if cr.will_close:
       
   620                     self.sock = None
       
   621                     self._current_response = None
       
   622                     return False
       
   623                 elif cr.complete():
       
   624                     self._current_response = None
       
   625                     return False
       
   626             return True
       
   627         return False
       
   628 
       
   629     def _reconnect(self, where, pheaders):
       
   630         logger.info('reconnecting during %s', where)
       
   631         self.close()
       
   632         self._connect(pheaders)
       
   633 
       
   634     def request(self, method, path, body=None, headers=None,
       
   635                 expect_continue=False):
       
   636         """Send a request to the server.
       
   637 
       
   638         For increased flexibility, this does not return the response
       
   639         object. Future versions of HTTPConnection that juggle multiple
       
   640         sockets will be able to send (for example) 5 requests all at
       
   641         once, and then let the requests arrive as data is
       
   642         available. Use the `getresponse()` method to retrieve the
       
   643         response.
       
   644         """
       
   645         if headers is None:
       
   646             headers = {}
       
   647         method = _ensurebytes(method)
       
   648         path = _ensurebytes(path)
       
   649         if self.busy():
       
   650             raise httplib.CannotSendRequest(
       
   651                 'Can not send another request before '
       
   652                 'current response is read!')
       
   653         self._current_response_taken = False
       
   654 
       
   655         logger.info('sending %s request for %s to %s on port %s',
       
   656                     method, path, self.host, self.port)
       
   657 
       
   658         hdrs = _foldheaders(headers)
       
   659         # Figure out headers that have to be computed from the request
       
   660         # body.
       
   661         chunked = False
       
   662         if body and HDR_CONTENT_LENGTH not in hdrs:
       
   663             if getattr(body, '__len__', False):
       
   664                 hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH,
       
   665                                             b'%d' % len(body))
       
   666             elif getattr(body, 'read', False):
       
   667                 hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING,
       
   668                                            XFER_ENCODING_CHUNKED)
       
   669                 chunked = True
       
   670             else:
       
   671                 raise BadRequestData('body has no __len__() nor read()')
       
   672         # Figure out expect-continue header
       
   673         if hdrs.get('expect', ('', ''))[1].lower() == b'100-continue':
       
   674             expect_continue = True
       
   675         elif expect_continue:
       
   676             hdrs['expect'] = (b'Expect', b'100-Continue')
       
   677         # httplib compatibility: if the user specified a
       
   678         # proxy-authorization header, that's actually intended for a
       
   679         # proxy CONNECT action, not the real request, but only if
       
   680         # we're going to use a proxy.
       
   681         pheaders = dict(self._proxy_headers)
       
   682         if self._proxy_host and self.ssl:
       
   683             pa = hdrs.pop('proxy-authorization', None)
       
   684             if pa is not None:
       
   685                 pheaders['proxy-authorization'] = pa
       
   686         # Build header data
       
   687         outgoing_headers = self._buildheaders(
       
   688             method, path, hdrs, self.http_version)
       
   689 
       
   690         # If we're reusing the underlying socket, there are some
       
   691         # conditions where we'll want to retry, so make a note of the
       
   692         # state of self.sock
       
   693         fresh_socket = self.sock is None
       
   694         self._connect(pheaders)
       
   695         response = None
       
   696         first = True
       
   697 
       
   698         while ((outgoing_headers or body)
       
   699                and not (response and response.complete())):
       
   700             select_timeout = self.timeout
       
   701             out = outgoing_headers or body
       
   702             blocking_on_continue = False
       
   703             if expect_continue and not outgoing_headers and not (
       
   704                 response and (response.headers or response.continued)):
       
   705                 logger.info(
       
   706                     'waiting up to %s seconds for'
       
   707                     ' continue response from server',
       
   708                     self.continue_timeout)
       
   709                 select_timeout = self.continue_timeout
       
   710                 blocking_on_continue = True
       
   711                 out = False
       
   712             if out:
       
   713                 w = [self.sock]
       
   714             else:
       
   715                 w = []
       
   716             r, w, x = select.select([self.sock], w, [], select_timeout)
       
   717             # if we were expecting a 100 continue and it's been long
       
   718             # enough, just go ahead and assume it's ok. This is the
       
   719             # recommended behavior from the RFC.
       
   720             if r == w == x == []:
       
   721                 if blocking_on_continue:
       
   722                     expect_continue = False
       
   723                     logger.info('no response to continue expectation from '
       
   724                                 'server, optimistically sending request body')
       
   725                 else:
       
   726                     raise HTTPTimeoutException('timeout sending data')
       
   727             was_first = first
       
   728 
       
   729             # incoming data
       
   730             if r:
       
   731                 try:
       
   732                     try:
       
   733                         data = r[0].recv(INCOMING_BUFFER_SIZE)
       
   734                     except ssl.SSLError as e:
       
   735                         if e.args[0] != ssl.SSL_ERROR_WANT_READ:
       
   736                             raise
       
   737                         logger.debug('SSL_ERROR_WANT_READ while sending '
       
   738                                      'data, retrying...')
       
   739                         continue
       
   740                     if not data:
       
   741                         logger.info('socket appears closed in read')
       
   742                         self.sock = None
       
   743                         self._current_response = None
       
   744                         if response is not None:
       
   745                             # We're a friend of the response class, so let
       
   746                             # us use the private attribute.
       
   747                             # pylint: disable=W0212
       
   748                             response._close()
       
   749                         # This if/elif ladder is a bit subtle,
       
   750                         # comments in each branch should help.
       
   751                         if response is not None and response.complete():
       
   752                             # Server responded completely and then
       
   753                             # closed the socket. We should just shut
       
   754                             # things down and let the caller get their
       
   755                             # response.
       
   756                             logger.info('Got an early response, '
       
   757                                         'aborting remaining request.')
       
   758                             break
       
   759                         elif was_first and response is None:
       
   760                             # Most likely a keepalive that got killed
       
   761                             # on the server's end. Commonly happens
       
   762                             # after getting a really large response
       
   763                             # from the server.
       
   764                             logger.info(
       
   765                                 'Connection appeared closed in read on first'
       
   766                                 ' request loop iteration, will retry.')
       
   767                             self._reconnect('read', pheaders)
       
   768                             continue
       
   769                         else:
       
   770                             # We didn't just send the first data hunk,
       
   771                             # and either have a partial response or no
       
   772                             # response at all. There's really nothing
       
   773                             # meaningful we can do here.
       
   774                             raise HTTPStateError(
       
   775                                 'Connection appears closed after '
       
   776                                 'some request data was written, but the '
       
   777                                 'response was missing or incomplete!')
       
   778                     logger.debug('read %d bytes in request()', len(data))
       
   779                     if response is None:
       
   780                         response = self.response_class(
       
   781                             r[0], self.timeout, method)
       
   782                     # We're a friend of the response class, so let us
       
   783                     # use the private attribute.
       
   784                     # pylint: disable=W0212
       
   785                     response._load_response(data)
       
   786                     # Jump to the next select() call so we load more
       
   787                     # data if the server is still sending us content.
       
   788                     continue
       
   789                 except socket.error as e:
       
   790                     if e[0] != errno.EPIPE and not was_first:
       
   791                         raise
       
   792 
       
   793             # outgoing data
       
   794             if w and out:
       
   795                 try:
       
   796                     if getattr(out, 'read', False):
       
   797                         # pylint guesses the type of out incorrectly here
       
   798                         # pylint: disable=E1103
       
   799                         data = out.read(OUTGOING_BUFFER_SIZE)
       
   800                         if not data:
       
   801                             continue
       
   802                         if len(data) < OUTGOING_BUFFER_SIZE:
       
   803                             if chunked:
       
   804                                 body = b'0' + EOL + EOL
       
   805                             else:
       
   806                                 body = None
       
   807                         if chunked:
       
   808                             # This encode is okay because we know
       
   809                             # hex() is building us only 0-9 and a-f
       
   810                             # digits.
       
   811                             asciilen = hex(len(data))[2:].encode('ascii')
       
   812                             out = asciilen + EOL + data + EOL
       
   813                         else:
       
   814                             out = data
       
   815                     amt = w[0].send(out)
       
   816                 except socket.error as e:
       
   817                     if e[0] == ssl.SSL_ERROR_WANT_WRITE and self.ssl:
       
   818                         # This means that SSL hasn't flushed its buffer into
       
   819                         # the socket yet.
       
   820                         # TODO: find a way to block on ssl flushing its buffer
       
   821                         # similar to selecting on a raw socket.
       
   822                         continue
       
   823                     if e[0] == errno.EWOULDBLOCK or e[0] == errno.EAGAIN:
       
   824                         continue
       
   825                     elif (e[0] not in (errno.ECONNRESET, errno.EPIPE)
       
   826                           and not first):
       
   827                         raise
       
   828                     self._reconnect('write', pheaders)
       
   829                     amt = self.sock.send(out)
       
   830                 logger.debug('sent %d', amt)
       
   831                 first = False
       
   832                 if out is body:
       
   833                     body = out[amt:]
       
   834                 else:
       
   835                     outgoing_headers = out[amt:]
       
   836         # End of request-sending loop.
       
   837 
       
   838         # close if the server response said to or responded before eating
       
   839         # the whole request
       
   840         if response is None:
       
   841             response = self.response_class(self.sock, self.timeout, method)
       
   842             if not fresh_socket:
       
   843                 if not response._select():
       
   844                     # This means the response failed to get any response
       
   845                     # data at all, and in all probability the socket was
       
   846                     # closed before the server even saw our request. Try
       
   847                     # the request again on a fresh socket.
       
   848                     logger.debug('response._select() failed during request().'
       
   849                                  ' Assuming request needs to be retried.')
       
   850                     self.sock = None
       
   851                     # Call this method explicitly to re-try the
       
   852                     # request. We don't use self.request() because
       
   853                     # some tools (notably Mercurial) expect to be able
       
   854                     # to subclass and redefine request(), and they
       
   855                     # don't have the same argspec as we do.
       
   856                     #
       
   857                     # TODO restructure sending of requests to avoid
       
   858                     # this recursion
       
   859                     return HTTPConnection.request(
       
   860                         self, method, path, body=body, headers=headers,
       
   861                         expect_continue=expect_continue)
       
   862         data_left = bool(outgoing_headers or body)
       
   863         if data_left:
       
   864             logger.info('stopped sending request early, '
       
   865                          'will close the socket to be safe.')
       
   866             response.will_close = True
       
   867         if response.will_close:
       
   868             # The socket will be closed by the response, so we disown
       
   869             # the socket
       
   870             self.sock = None
       
   871         self._current_response = response
       
   872 
       
   873     def getresponse(self):
       
   874         """Returns the response to the most recent request."""
       
   875         if self._current_response is None:
       
   876             raise httplib.ResponseNotReady()
       
   877         r = self._current_response
       
   878         while r.headers is None:
       
   879             # We're a friend of the response class, so let us use the
       
   880             # private attribute.
       
   881             # pylint: disable=W0212
       
   882             if not r._select() and not r.complete():
       
   883                 raise _readers.HTTPRemoteClosedError()
       
   884         if r.will_close:
       
   885             self.sock = None
       
   886             self._current_response = None
       
   887         elif r.complete():
       
   888             self._current_response = None
       
   889         else:
       
   890             self._current_response_taken = True
       
   891         return r
       
   892 
       
   893 
       
   894 class HTTPTimeoutException(httplib.HTTPException):
       
   895     """A timeout occurred while waiting on the server."""
       
   896 
       
   897 
       
   898 class BadRequestData(httplib.HTTPException):
       
   899     """Request body object has neither __len__ nor read."""
       
   900 
       
   901 
       
   902 class HTTPProxyConnectFailedException(httplib.HTTPException):
       
   903     """Connecting to the HTTP proxy failed."""
       
   904 
       
   905 
       
   906 class HTTPStateError(httplib.HTTPException):
       
   907     """Invalid internal state encountered."""
       
   908 
       
   909 # Forward this exception type from _readers since it needs to be part
       
   910 # of the public API.
       
   911 HTTPRemoteClosedError = _readers.HTTPRemoteClosedError
       
   912 # no-check-code