Skip to content

Commit 6a1dd54

Browse files
committed
Force update script to download the image and calculate SHA512
Since we want to check the hash against the openstack backend, we need to have the SHA512. Sadly, most images creators do not provide us with that hash pre-computed. That means we will compute the SHA512 for every new image update in the CI worker by downloading the image. Signed-off-by: Gondermann <gondermann@b1-systems.de>
1 parent 5bc0135 commit 6a1dd54

1 file changed

Lines changed: 86 additions & 30 deletions

File tree

openstack_image_manager/update.py

Lines changed: 86 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@
22
# source of latest URLs: https://gitlab.com/libosinfo/osinfo-db
33

44
from datetime import datetime
5+
import hashlib
6+
import math
57
import os
68
import re
7-
import shutil
89
import sys
9-
import time
1010
from urllib.parse import urlparse
11-
from urllib.request import urlopen
1211

1312
from loguru import logger
1413
from minio import Minio
@@ -105,14 +104,14 @@ def get_latest_debubu(shortname, latest_checksum_url, latest_url, checksum_type=
105104
IMAGES = {
106105
"almalinux": get_latest_default,
107106
"centos": get_latest_default,
108-
"debian": get_latest_debubu,
109-
"rockylinux": get_latest_default,
107+
"debian": get_latest_debubu,
108+
"rockylinux": get_latest_default,
110109
"ubuntu": get_latest_debubu,
111110
}
112111

113112

114113
def mirror_image(
115-
image, minio_server, minio_bucket, minio_access_key, minio_secret_key
114+
image, extracted_file, minio_server, minio_bucket, minio_access_key, minio_secret_key
116115
):
117116
client = Minio(
118117
minio_server,
@@ -121,14 +120,7 @@ def mirror_image(
121120
)
122121

123122
version = image["versions"][0]
124-
125-
path = urlparse(version["url"])
126123
dirname = image["shortname"]
127-
filename, fileextension = os.path.splitext(os.path.basename(path.path))
128-
129-
if fileextension not in [".bz2", ".zip", ".xz", ".gz"]:
130-
filename += fileextension
131-
132124
shortname = image["shortname"]
133125
format = image["format"]
134126
new_version = version["version"]
@@ -139,23 +131,73 @@ def mirror_image(
139131
logger.info("'%s' available in '%s'" % (new_filename, dirname))
140132
except S3Error:
141133
logger.info("'%s' not yet available in '%s'" % (new_filename, dirname))
142-
logger.info("Downloading '%s'" % version["url"])
143-
response = requests.get(version["url"], stream=True)
144-
with open(os.path.basename(path.path), "wb") as fp:
145-
shutil.copyfileobj(response.raw, fp)
146-
del response
147-
148-
if fileextension in [".bz2", ".zip", ".xz", ".gz"]:
149-
logger.info("Decompressing '%s'" % os.path.basename(path.path))
150-
patoolib.extract_archive(os.path.basename(path.path), outdir=".")
151-
os.remove(os.path.basename(path.path))
152134

153135
logger.info(
154-
"Uploading '%s' to '%s' as '%s'" % (filename, dirname, new_filename)
136+
"Uploading '%s' to '%s' as '%s'" % (extracted_file, dirname, new_filename)
155137
)
156138

157-
client.fput_object(minio_bucket, os.path.join(dirname, new_filename), filename)
158-
os.remove(filename)
139+
client.fput_object(minio_bucket, os.path.join(dirname, new_filename), extracted_file)
140+
141+
142+
def size_clean(size):
143+
size_name = ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")
144+
i = int(math.floor(math.log(size, 1024)))
145+
s = size / 1024 ** i
146+
return f"{s:.2f} {size_name[i]}"
147+
148+
149+
def download_and_hash(download_url: str):
150+
path = urlparse(download_url)
151+
filename, fileextension = os.path.splitext(os.path.basename(path.path))
152+
is_archive = fileextension in [".bz2", ".zip", ".xz", ".gz"]
153+
if not is_archive:
154+
filename += fileextension
155+
download_filename = os.path.basename(path.path)
156+
http_headers = None
157+
hash_obj = hashlib.new("sha512")
158+
159+
with requests.get(url=download_url, stream=True, timeout=30) as response:
160+
if response.status_code != 200:
161+
logger.error(f"Downloading image '{download_url}' failed with error code {response.status_code}")
162+
return None, None, None
163+
164+
http_headers = response.headers
165+
file_size = int(http_headers["Content-Length"])
166+
logger.info(f"Image size {size_clean(file_size)}")
167+
168+
downloadedBytes = 0
169+
lastProgress = 0
170+
with open(download_filename, "wb") as fp:
171+
for chunk in response.iter_content(chunk_size=8192):
172+
downloadedBytes += 8192
173+
progressPercent = (downloadedBytes / file_size) * 100
174+
progress = round(min(max(progressPercent, 0), 100))
175+
if progress - lastProgress >= 5:
176+
logger.info(f"Downloading image: {progress}%")
177+
lastProgress = progress
178+
179+
fp.write(chunk)
180+
181+
if not is_archive:
182+
hash_obj.update(chunk)
183+
184+
if not is_archive:
185+
sha512 = hash_obj.hexdigest()
186+
return http_headers, f"sha512:{sha512}", download_filename
187+
else:
188+
assert download_filename not in ["", ".", " ", "/", ".."]
189+
logger.info("Decompressing '%s'" % download_filename)
190+
patoolib.extract_archive(download_filename, outdir=".")
191+
os.remove(download_filename)
192+
193+
with open(filename, 'rb') as fp:
194+
chunk = fp.read(8192)
195+
while chunk:
196+
hash_obj.update(chunk)
197+
chunk = fp.read(8192)
198+
199+
sha512 = hash_obj.hexdigest()
200+
return http_headers, f"sha512:{sha512}", filename
159201

160202

161203
def update_image(image, getter, minio_server, minio_bucket, minio_access_key, minio_secret_key):
@@ -167,7 +209,7 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
167209

168210
latest_checksum_url = image["latest_checksum_url"]
169211
logger.info(f"Getting checksums from {latest_checksum_url}")
170-
212+
171213
shortname = image["shortname"]
172214
current_checksum, current_url, current_version = getter(shortname, latest_checksum_url, latest_url)
173215

@@ -181,6 +223,7 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
181223
"checksum": None,
182224
"url": None,
183225
"version": None,
226+
"verify_checksum": None,
184227
}
185228
)
186229

@@ -191,12 +234,20 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
191234
logger.info(f"Image {name} is up-to-date, nothing to do")
192235
return 0
193236

237+
logger.info(f"Image {name} change detected. Downloading Image...")
238+
239+
headers, verify_checksum, extracted_file = download_and_hash(current_url)
240+
if verify_checksum is None or extracted_file in ["", ".", " ", "/", ".."]:
241+
logger.error(f"Downloading and hashing {name} failed")
242+
return 0
243+
244+
logger.info(f"Image {name} has the verification checksum {verify_checksum}")
245+
194246
if current_version is None:
195-
logger.info(f"Checking {current_url}")
247+
logger.info("Using HTTP 'last-modified' header as current version")
196248

197-
conn = urlopen(current_url, timeout=30)
198249
dt = datetime.strptime(
199-
conn.headers["last-modified"], "%a, %d %b %Y %H:%M:%S %Z"
250+
headers["last-modified"], "%a, %d %b %Y %H:%M:%S %Z"
200251
)
201252
current_version = dt.strftime("%Y%m%d")
202253

@@ -205,6 +256,7 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
205256
"build_date": datetime.strptime(current_version, "%Y%m%d").date(),
206257
"checksum": current_checksum,
207258
"url": current_url,
259+
"verify_checksum": verify_checksum,
208260
}
209261
logger.info(f"New values are {new_values}")
210262
image["versions"][0].update(new_values)
@@ -220,11 +272,15 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
220272

221273
mirror_image(
222274
image,
275+
extracted_file,
223276
minio_server,
224277
minio_bucket,
225278
minio_access_key,
226279
minio_secret_key,
227280
)
281+
282+
os.remove(extracted_file)
283+
228284
return 1
229285

230286

0 commit comments

Comments
 (0)