|
1 # packagingutil.py - Common packaging utility code. |
|
2 # |
|
3 # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com> |
|
4 # |
|
5 # This software may be used and distributed according to the terms of the |
|
6 # GNU General Public License version 2 or any later version. |
|
7 |
|
8 # no-check-code because Python 3 native. |
|
9 |
|
10 import gzip |
|
11 import hashlib |
|
12 import pathlib |
|
13 import tarfile |
|
14 import urllib.request |
|
15 import zipfile |
|
16 |
|
17 |
|
18 def hash_path(p: pathlib.Path): |
|
19 h = hashlib.sha256() |
|
20 |
|
21 with p.open('rb') as fh: |
|
22 while True: |
|
23 chunk = fh.read(65536) |
|
24 if not chunk: |
|
25 break |
|
26 |
|
27 h.update(chunk) |
|
28 |
|
29 return h.hexdigest() |
|
30 |
|
31 |
|
32 class IntegrityError(Exception): |
|
33 """Represents an integrity error when downloading a URL.""" |
|
34 |
|
35 |
|
36 def secure_download_stream(url, size, sha256): |
|
37 """Securely download a URL to a stream of chunks. |
|
38 |
|
39 If the integrity of the download fails, an IntegrityError is |
|
40 raised. |
|
41 """ |
|
42 h = hashlib.sha256() |
|
43 length = 0 |
|
44 |
|
45 with urllib.request.urlopen(url) as fh: |
|
46 if not url.endswith('.gz') and fh.info().get('Content-Encoding') == 'gzip': |
|
47 fh = gzip.GzipFile(fileobj=fh) |
|
48 |
|
49 while True: |
|
50 chunk = fh.read(65536) |
|
51 if not chunk: |
|
52 break |
|
53 |
|
54 h.update(chunk) |
|
55 length += len(chunk) |
|
56 |
|
57 yield chunk |
|
58 |
|
59 digest = h.hexdigest() |
|
60 |
|
61 if length != size: |
|
62 raise IntegrityError('size mismatch on %s: wanted %d; got %d' % ( |
|
63 url, size, length)) |
|
64 |
|
65 if digest != sha256: |
|
66 raise IntegrityError('sha256 mismatch on %s: wanted %s; got %s' % ( |
|
67 url, sha256, digest)) |
|
68 |
|
69 |
|
70 def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str): |
|
71 """Download a URL to a filesystem path, possibly with verification.""" |
|
72 |
|
73 # We download to a temporary file and rename at the end so there's |
|
74 # no chance of the final file being partially written or containing |
|
75 # bad data. |
|
76 print('downloading %s to %s' % (url, path)) |
|
77 |
|
78 if path.exists(): |
|
79 good = True |
|
80 |
|
81 if path.stat().st_size != size: |
|
82 print('existing file size is wrong; removing') |
|
83 good = False |
|
84 |
|
85 if good: |
|
86 if hash_path(path) != sha256: |
|
87 print('existing file hash is wrong; removing') |
|
88 good = False |
|
89 |
|
90 if good: |
|
91 print('%s exists and passes integrity checks' % path) |
|
92 return |
|
93 |
|
94 path.unlink() |
|
95 |
|
96 tmp = path.with_name('%s.tmp' % path.name) |
|
97 |
|
98 try: |
|
99 with tmp.open('wb') as fh: |
|
100 for chunk in secure_download_stream(url, size, sha256): |
|
101 fh.write(chunk) |
|
102 except IntegrityError: |
|
103 tmp.unlink() |
|
104 raise |
|
105 |
|
106 tmp.rename(path) |
|
107 print('successfully downloaded %s' % url) |
|
108 |
|
109 |
|
110 def download_entry(entry: dict, dest_path: pathlib.Path, local_name=None) -> pathlib.Path: |
|
111 url = entry['url'] |
|
112 |
|
113 local_name = local_name or url[url.rindex('/') + 1:] |
|
114 |
|
115 local_path = dest_path / local_name |
|
116 download_to_path(url, local_path, entry['size'], entry['sha256']) |
|
117 |
|
118 return local_path |
|
119 |
|
120 |
|
121 def extract_tar_to_directory(source: pathlib.Path, dest: pathlib.Path): |
|
122 with tarfile.open(source, 'r') as tf: |
|
123 tf.extractall(dest) |
|
124 |
|
125 |
|
126 def extract_zip_to_directory(source: pathlib.Path, dest: pathlib.Path): |
|
127 with zipfile.ZipFile(source, 'r') as zf: |
|
128 zf.extractall(dest) |