213 return self.do_open(HTTPConnection, req) |
213 return self.do_open(HTTPConnection, req) |
214 |
214 |
215 def do_open(self, http_class, req): |
215 def do_open(self, http_class, req): |
216 host = urllibcompat.gethost(req) |
216 host = urllibcompat.gethost(req) |
217 if not host: |
217 if not host: |
218 raise urlerr.urlerror('no host given') |
218 raise urlerr.urlerror(b'no host given') |
219 |
219 |
220 try: |
220 try: |
221 h = self._cm.get_ready_conn(host) |
221 h = self._cm.get_ready_conn(host) |
222 while h: |
222 while h: |
223 r = self._reuse_connection(h, req, host) |
223 r = self._reuse_connection(h, req, host) |
235 else: |
235 else: |
236 # no (working) free connections were found. Create a new one. |
236 # no (working) free connections were found. Create a new one. |
237 h = http_class(host, timeout=self._timeout) |
237 h = http_class(host, timeout=self._timeout) |
238 if DEBUG: |
238 if DEBUG: |
239 DEBUG.info( |
239 DEBUG.info( |
240 "creating new connection to %s (%d)", host, id(h) |
240 b"creating new connection to %s (%d)", host, id(h) |
241 ) |
241 ) |
242 self._cm.add(host, h, False) |
242 self._cm.add(host, h, False) |
243 self._start_transaction(h, req) |
243 self._start_transaction(h, req) |
244 r = h.getresponse() |
244 r = h.getresponse() |
245 # The string form of BadStatusLine is the status line. Add some context |
245 # The string form of BadStatusLine is the status line. Add some context |
246 # to make the error message slightly more useful. |
246 # to make the error message slightly more useful. |
247 except httplib.BadStatusLine as err: |
247 except httplib.BadStatusLine as err: |
248 raise urlerr.urlerror( |
248 raise urlerr.urlerror( |
249 _('bad HTTP status line: %s') % pycompat.sysbytes(err.line) |
249 _(b'bad HTTP status line: %s') % pycompat.sysbytes(err.line) |
250 ) |
250 ) |
251 except (socket.error, httplib.HTTPException) as err: |
251 except (socket.error, httplib.HTTPException) as err: |
252 raise urlerr.urlerror(err) |
252 raise urlerr.urlerror(err) |
253 |
253 |
254 # If not a persistent connection, don't try to reuse it. Look |
254 # If not a persistent connection, don't try to reuse it. Look |
256 # attribute, and in that case always close the connection. |
256 # attribute, and in that case always close the connection. |
257 if getattr(r, r'will_close', True): |
257 if getattr(r, r'will_close', True): |
258 self._cm.remove(h) |
258 self._cm.remove(h) |
259 |
259 |
260 if DEBUG: |
260 if DEBUG: |
261 DEBUG.info("STATUS: %s, %s", r.status, r.reason) |
261 DEBUG.info(b"STATUS: %s, %s", r.status, r.reason) |
262 r._handler = self |
262 r._handler = self |
263 r._host = host |
263 r._host = host |
264 r._url = req.get_full_url() |
264 r._url = req.get_full_url() |
265 r._connection = h |
265 r._connection = h |
266 r.code = r.status |
266 r.code = r.status |
308 # bad header back. This is most likely to happen if |
308 # bad header back. This is most likely to happen if |
309 # the socket has been closed by the server since we |
309 # the socket has been closed by the server since we |
310 # last used the connection. |
310 # last used the connection. |
311 if DEBUG: |
311 if DEBUG: |
312 DEBUG.info( |
312 DEBUG.info( |
313 "failed to re-use connection to %s (%d)", host, id(h) |
313 b"failed to re-use connection to %s (%d)", host, id(h) |
314 ) |
314 ) |
315 r = None |
315 r = None |
316 else: |
316 else: |
317 if DEBUG: |
317 if DEBUG: |
318 DEBUG.info("re-using connection to %s (%d)", host, id(h)) |
318 DEBUG.info(b"re-using connection to %s (%d)", host, id(h)) |
319 |
319 |
320 return r |
320 return r |
321 |
321 |
322 def _start_transaction(self, h, req): |
322 def _start_transaction(self, h, req): |
323 oldbytescount = getattr(h, 'sentbytescount', 0) |
323 oldbytescount = getattr(h, 'sentbytescount', 0) |
406 self, sock, debuglevel=debuglevel, method=method, **extrakw |
406 self, sock, debuglevel=debuglevel, method=method, **extrakw |
407 ) |
407 ) |
408 self.fileno = sock.fileno |
408 self.fileno = sock.fileno |
409 self.code = None |
409 self.code = None |
410 self.receivedbytescount = 0 |
410 self.receivedbytescount = 0 |
411 self._rbuf = '' |
411 self._rbuf = b'' |
412 self._rbufsize = 8096 |
412 self._rbufsize = 8096 |
413 self._handler = None # inserted by the handler later |
413 self._handler = None # inserted by the handler later |
414 self._host = None # (same) |
414 self._host = None # (same) |
415 self._url = None # (same) |
415 self._url = None # (same) |
416 self._connection = None # (same) |
416 self._connection = None # (same) |
482 parts = [] |
482 parts = [] |
483 |
483 |
484 while True: |
484 while True: |
485 if chunk_left is None: |
485 if chunk_left is None: |
486 line = self.fp.readline() |
486 line = self.fp.readline() |
487 i = line.find(';') |
487 i = line.find(b';') |
488 if i >= 0: |
488 if i >= 0: |
489 line = line[:i] # strip chunk-extensions |
489 line = line[:i] # strip chunk-extensions |
490 try: |
490 try: |
491 chunk_left = int(line, 16) |
491 chunk_left = int(line, 16) |
492 except ValueError: |
492 except ValueError: |
493 # close the connection as protocol synchronization is |
493 # close the connection as protocol synchronization is |
494 # probably lost |
494 # probably lost |
495 self.close() |
495 self.close() |
496 raise httplib.IncompleteRead(''.join(parts)) |
496 raise httplib.IncompleteRead(b''.join(parts)) |
497 if chunk_left == 0: |
497 if chunk_left == 0: |
498 break |
498 break |
499 if amt is None: |
499 if amt is None: |
500 parts.append(self._safe_read(chunk_left)) |
500 parts.append(self._safe_read(chunk_left)) |
501 elif amt < chunk_left: |
501 elif amt < chunk_left: |
502 parts.append(self._safe_read(amt)) |
502 parts.append(self._safe_read(amt)) |
503 self.chunk_left = chunk_left - amt |
503 self.chunk_left = chunk_left - amt |
504 return ''.join(parts) |
504 return b''.join(parts) |
505 elif amt == chunk_left: |
505 elif amt == chunk_left: |
506 parts.append(self._safe_read(amt)) |
506 parts.append(self._safe_read(amt)) |
507 self._safe_read(2) # toss the CRLF at the end of the chunk |
507 self._safe_read(2) # toss the CRLF at the end of the chunk |
508 self.chunk_left = None |
508 self.chunk_left = None |
509 return ''.join(parts) |
509 return b''.join(parts) |
510 else: |
510 else: |
511 parts.append(self._safe_read(chunk_left)) |
511 parts.append(self._safe_read(chunk_left)) |
512 amt -= chunk_left |
512 amt -= chunk_left |
513 |
513 |
514 # we read the whole chunk, get another |
514 # we read the whole chunk, get another |
521 line = self.fp.readline() |
521 line = self.fp.readline() |
522 if not line: |
522 if not line: |
523 # a vanishingly small number of sites EOF without |
523 # a vanishingly small number of sites EOF without |
524 # sending the trailer |
524 # sending the trailer |
525 break |
525 break |
526 if line == '\r\n': |
526 if line == b'\r\n': |
527 break |
527 break |
528 |
528 |
529 # we read everything; close the "file" |
529 # we read everything; close the "file" |
530 self.close() |
530 self.close() |
531 |
531 |
532 return ''.join(parts) |
532 return b''.join(parts) |
533 |
533 |
534 def readline(self): |
534 def readline(self): |
535 # Fast path for a line is already available in read buffer. |
535 # Fast path for a line is already available in read buffer. |
536 i = self._rbuf.find('\n') |
536 i = self._rbuf.find(b'\n') |
537 if i >= 0: |
537 if i >= 0: |
538 i += 1 |
538 i += 1 |
539 line = self._rbuf[:i] |
539 line = self._rbuf[:i] |
540 self._rbuf = self._rbuf[i:] |
540 self._rbuf = self._rbuf[i:] |
541 return line |
541 return line |
555 self._handler.parent.receivedbytescount += len(new) |
555 self._handler.parent.receivedbytescount += len(new) |
556 except AttributeError: |
556 except AttributeError: |
557 pass |
557 pass |
558 |
558 |
559 chunks.append(new) |
559 chunks.append(new) |
560 i = new.find('\n') |
560 i = new.find(b'\n') |
561 if i >= 0: |
561 if i >= 0: |
562 break |
562 break |
563 |
563 |
564 # We either have exhausted the stream or have a newline in chunks[-1]. |
564 # We either have exhausted the stream or have a newline in chunks[-1]. |
565 |
565 |
566 # EOF |
566 # EOF |
567 if i == -1: |
567 if i == -1: |
568 self._rbuf = '' |
568 self._rbuf = b'' |
569 return ''.join(chunks) |
569 return b''.join(chunks) |
570 |
570 |
571 i += 1 |
571 i += 1 |
572 self._rbuf = chunks[-1][i:] |
572 self._rbuf = chunks[-1][i:] |
573 chunks[-1] = chunks[-1][:i] |
573 chunks[-1] = chunks[-1][:i] |
574 return ''.join(chunks) |
574 return b''.join(chunks) |
575 |
575 |
576 def readlines(self, sizehint=0): |
576 def readlines(self, sizehint=0): |
577 total = 0 |
577 total = 0 |
578 list = [] |
578 list = [] |
579 while True: |
579 while True: |
640 # the socket. we want to reconnect when somebody tries to send again. |
640 # the socket. we want to reconnect when somebody tries to send again. |
641 # |
641 # |
642 # NOTE: we DO propagate the error, though, because we cannot simply |
642 # NOTE: we DO propagate the error, though, because we cannot simply |
643 # ignore the error... the caller will know if they can retry. |
643 # ignore the error... the caller will know if they can retry. |
644 if self.debuglevel > 0: |
644 if self.debuglevel > 0: |
645 print("send:", repr(str)) |
645 print(b"send:", repr(str)) |
646 try: |
646 try: |
647 blocksize = 8192 |
647 blocksize = 8192 |
648 read = getattr(str, 'read', None) |
648 read = getattr(str, 'read', None) |
649 if read is not None: |
649 if read is not None: |
650 if self.debuglevel > 0: |
650 if self.debuglevel > 0: |
651 print("sending a read()able") |
651 print(b"sending a read()able") |
652 data = read(blocksize) |
652 data = read(blocksize) |
653 while data: |
653 while data: |
654 self.sock.sendall(data) |
654 self.sock.sendall(data) |
655 self.sentbytescount += len(data) |
655 self.sentbytescount += len(data) |
656 data = read(blocksize) |
656 data = read(blocksize) |
708 ######################################################################### |
708 ######################################################################### |
709 |
709 |
710 |
710 |
711 def continuity(url): |
711 def continuity(url): |
712 md5 = hashlib.md5 |
712 md5 = hashlib.md5 |
713 format = '%25s: %s' |
713 format = b'%25s: %s' |
714 |
714 |
715 # first fetch the file with the normal http handler |
715 # first fetch the file with the normal http handler |
716 opener = urlreq.buildopener() |
716 opener = urlreq.buildopener() |
717 urlreq.installopener(opener) |
717 urlreq.installopener(opener) |
718 fo = urlreq.urlopen(url) |
718 fo = urlreq.urlopen(url) |
719 foo = fo.read() |
719 foo = fo.read() |
720 fo.close() |
720 fo.close() |
721 m = md5(foo) |
721 m = md5(foo) |
722 print(format % ('normal urllib', node.hex(m.digest()))) |
722 print(format % (b'normal urllib', node.hex(m.digest()))) |
723 |
723 |
724 # now install the keepalive handler and try again |
724 # now install the keepalive handler and try again |
725 opener = urlreq.buildopener(HTTPHandler()) |
725 opener = urlreq.buildopener(HTTPHandler()) |
726 urlreq.installopener(opener) |
726 urlreq.installopener(opener) |
727 |
727 |
728 fo = urlreq.urlopen(url) |
728 fo = urlreq.urlopen(url) |
729 foo = fo.read() |
729 foo = fo.read() |
730 fo.close() |
730 fo.close() |
731 m = md5(foo) |
731 m = md5(foo) |
732 print(format % ('keepalive read', node.hex(m.digest()))) |
732 print(format % (b'keepalive read', node.hex(m.digest()))) |
733 |
733 |
734 fo = urlreq.urlopen(url) |
734 fo = urlreq.urlopen(url) |
735 foo = '' |
735 foo = b'' |
736 while True: |
736 while True: |
737 f = fo.readline() |
737 f = fo.readline() |
738 if f: |
738 if f: |
739 foo = foo + f |
739 foo = foo + f |
740 else: |
740 else: |
741 break |
741 break |
742 fo.close() |
742 fo.close() |
743 m = md5(foo) |
743 m = md5(foo) |
744 print(format % ('keepalive readline', node.hex(m.digest()))) |
744 print(format % (b'keepalive readline', node.hex(m.digest()))) |
745 |
745 |
746 |
746 |
747 def comp(N, url): |
747 def comp(N, url): |
748 print(' making %i connections to:\n %s' % (N, url)) |
748 print(b' making %i connections to:\n %s' % (N, url)) |
749 |
749 |
750 procutil.stdout.write(' first using the normal urllib handlers') |
750 procutil.stdout.write(b' first using the normal urllib handlers') |
751 # first use normal opener |
751 # first use normal opener |
752 opener = urlreq.buildopener() |
752 opener = urlreq.buildopener() |
753 urlreq.installopener(opener) |
753 urlreq.installopener(opener) |
754 t1 = fetch(N, url) |
754 t1 = fetch(N, url) |
755 print(' TIME: %.3f s' % t1) |
755 print(b' TIME: %.3f s' % t1) |
756 |
756 |
757 procutil.stdout.write(' now using the keepalive handler ') |
757 procutil.stdout.write(b' now using the keepalive handler ') |
758 # now install the keepalive handler and try again |
758 # now install the keepalive handler and try again |
759 opener = urlreq.buildopener(HTTPHandler()) |
759 opener = urlreq.buildopener(HTTPHandler()) |
760 urlreq.installopener(opener) |
760 urlreq.installopener(opener) |
761 t2 = fetch(N, url) |
761 t2 = fetch(N, url) |
762 print(' TIME: %.3f s' % t2) |
762 print(b' TIME: %.3f s' % t2) |
763 print(' improvement factor: %.2f' % (t1 / t2)) |
763 print(b' improvement factor: %.2f' % (t1 / t2)) |
764 |
764 |
765 |
765 |
766 def fetch(N, url, delay=0): |
766 def fetch(N, url, delay=0): |
767 import time |
767 import time |
768 |
768 |
795 print(msg % args) |
795 print(msg % args) |
796 |
796 |
797 info = warning = error = debug |
797 info = warning = error = debug |
798 |
798 |
799 DEBUG = FakeLogger() |
799 DEBUG = FakeLogger() |
800 print(" fetching the file to establish a connection") |
800 print(b" fetching the file to establish a connection") |
801 fo = urlreq.urlopen(url) |
801 fo = urlreq.urlopen(url) |
802 data1 = fo.read() |
802 data1 = fo.read() |
803 fo.close() |
803 fo.close() |
804 |
804 |
805 i = 20 |
805 i = 20 |
806 print(" waiting %i seconds for the server to close the connection" % i) |
806 print(b" waiting %i seconds for the server to close the connection" % i) |
807 while i > 0: |
807 while i > 0: |
808 procutil.stdout.write('\r %2i' % i) |
808 procutil.stdout.write(b'\r %2i' % i) |
809 procutil.stdout.flush() |
809 procutil.stdout.flush() |
810 time.sleep(1) |
810 time.sleep(1) |
811 i -= 1 |
811 i -= 1 |
812 procutil.stderr.write('\r') |
812 procutil.stderr.write(b'\r') |
813 |
813 |
814 print(" fetching the file a second time") |
814 print(b" fetching the file a second time") |
815 fo = urlreq.urlopen(url) |
815 fo = urlreq.urlopen(url) |
816 data2 = fo.read() |
816 data2 = fo.read() |
817 fo.close() |
817 fo.close() |
818 |
818 |
819 if data1 == data2: |
819 if data1 == data2: |
820 print(' data are identical') |
820 print(b' data are identical') |
821 else: |
821 else: |
822 print(' ERROR: DATA DIFFER') |
822 print(b' ERROR: DATA DIFFER') |
823 |
823 |
824 DEBUG = dbbackup |
824 DEBUG = dbbackup |
825 |
825 |
826 |
826 |
827 def test(url, N=10): |
827 def test(url, N=10): |
828 print("performing continuity test (making sure stuff isn't corrupted)") |
828 print(b"performing continuity test (making sure stuff isn't corrupted)") |
829 continuity(url) |
829 continuity(url) |
830 print('') |
830 print(b'') |
831 print("performing speed comparison") |
831 print(b"performing speed comparison") |
832 comp(N, url) |
832 comp(N, url) |
833 print('') |
833 print(b'') |
834 print("performing dropped-connection check") |
834 print(b"performing dropped-connection check") |
835 test_timeout(url) |
835 test_timeout(url) |
836 |
836 |
837 |
837 |
838 if __name__ == '__main__': |
838 if __name__ == '__main__': |
839 import time |
839 import time |
840 |
840 |
841 try: |
841 try: |
842 N = int(sys.argv[1]) |
842 N = int(sys.argv[1]) |
843 url = sys.argv[2] |
843 url = sys.argv[2] |
844 except (IndexError, ValueError): |
844 except (IndexError, ValueError): |
845 print("%s <integer> <url>" % sys.argv[0]) |
845 print(b"%s <integer> <url>" % sys.argv[0]) |
846 else: |
846 else: |
847 test(url, N) |
847 test(url, N) |