contrib/packaging/packagingutil.py
changeset 41853 d7dc4ac1ff84
equal deleted inserted replaced
41852:db3098d02a6d 41853:d7dc4ac1ff84
       
     1 # packagingutil.py - Common packaging utility code.
       
     2 #
       
     3 # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 # no-check-code because Python 3 native.
       
     9 
       
    10 import gzip
       
    11 import hashlib
       
    12 import pathlib
       
    13 import tarfile
       
    14 import urllib.request
       
    15 import zipfile
       
    16 
       
    17 
       
    18 def hash_path(p: pathlib.Path):
       
    19     h = hashlib.sha256()
       
    20 
       
    21     with p.open('rb') as fh:
       
    22         while True:
       
    23             chunk = fh.read(65536)
       
    24             if not chunk:
       
    25                 break
       
    26 
       
    27             h.update(chunk)
       
    28 
       
    29     return h.hexdigest()
       
    30 
       
    31 
       
    32 class IntegrityError(Exception):
       
    33     """Represents an integrity error when downloading a URL."""
       
    34 
       
    35 
       
    36 def secure_download_stream(url, size, sha256):
       
    37     """Securely download a URL to a stream of chunks.
       
    38 
       
    39     If the integrity of the download fails, an IntegrityError is
       
    40     raised.
       
    41     """
       
    42     h = hashlib.sha256()
       
    43     length = 0
       
    44 
       
    45     with urllib.request.urlopen(url) as fh:
       
    46         if not url.endswith('.gz') and fh.info().get('Content-Encoding') == 'gzip':
       
    47             fh = gzip.GzipFile(fileobj=fh)
       
    48 
       
    49         while True:
       
    50             chunk = fh.read(65536)
       
    51             if not chunk:
       
    52                 break
       
    53 
       
    54             h.update(chunk)
       
    55             length += len(chunk)
       
    56 
       
    57             yield chunk
       
    58 
       
    59     digest = h.hexdigest()
       
    60 
       
    61     if length != size:
       
    62         raise IntegrityError('size mismatch on %s: wanted %d; got %d' % (
       
    63             url, size, length))
       
    64 
       
    65     if digest != sha256:
       
    66         raise IntegrityError('sha256 mismatch on %s: wanted %s; got %s' % (
       
    67             url, sha256, digest))
       
    68 
       
    69 
       
    70 def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str):
       
    71     """Download a URL to a filesystem path, possibly with verification."""
       
    72 
       
    73     # We download to a temporary file and rename at the end so there's
       
    74     # no chance of the final file being partially written or containing
       
    75     # bad data.
       
    76     print('downloading %s to %s' % (url, path))
       
    77 
       
    78     if path.exists():
       
    79         good = True
       
    80 
       
    81         if path.stat().st_size != size:
       
    82             print('existing file size is wrong; removing')
       
    83             good = False
       
    84 
       
    85         if good:
       
    86             if hash_path(path) != sha256:
       
    87                 print('existing file hash is wrong; removing')
       
    88                 good = False
       
    89 
       
    90         if good:
       
    91             print('%s exists and passes integrity checks' % path)
       
    92             return
       
    93 
       
    94         path.unlink()
       
    95 
       
    96     tmp = path.with_name('%s.tmp' % path.name)
       
    97 
       
    98     try:
       
    99         with tmp.open('wb') as fh:
       
   100             for chunk in secure_download_stream(url, size, sha256):
       
   101                 fh.write(chunk)
       
   102     except IntegrityError:
       
   103         tmp.unlink()
       
   104         raise
       
   105 
       
   106     tmp.rename(path)
       
   107     print('successfully downloaded %s' % url)
       
   108 
       
   109 
       
   110 def download_entry(entry: dict, dest_path: pathlib.Path, local_name=None) -> pathlib.Path:
       
   111     url = entry['url']
       
   112 
       
   113     local_name = local_name or url[url.rindex('/') + 1:]
       
   114 
       
   115     local_path = dest_path / local_name
       
   116     download_to_path(url, local_path, entry['size'], entry['sha256'])
       
   117 
       
   118     return local_path
       
   119 
       
   120 
       
   121 def extract_tar_to_directory(source: pathlib.Path, dest: pathlib.Path):
       
   122     with tarfile.open(source, 'r') as tf:
       
   123         tf.extractall(dest)
       
   124 
       
   125 
       
   126 def extract_zip_to_directory(source: pathlib.Path, dest: pathlib.Path):
       
   127     with zipfile.ZipFile(source, 'r') as zf:
       
   128         zf.extractall(dest)