22# source of latest URLs: https://gitlab.com/libosinfo/osinfo-db
33
44from datetime import datetime
5+ import hashlib
6+ import math
57import os
68import re
7- import shutil
89import sys
9- import time
1010from urllib .parse import urlparse
11- from urllib .request import urlopen
1211
1312from loguru import logger
1413from minio import Minio
@@ -105,14 +104,14 @@ def get_latest_debubu(shortname, latest_checksum_url, latest_url, checksum_type=
105104IMAGES = {
106105 "almalinux" : get_latest_default ,
107106 "centos" : get_latest_default ,
108- "debian" : get_latest_debubu ,
109- "rockylinux" : get_latest_default ,
107+ "debian" : get_latest_debubu ,
108+ "rockylinux" : get_latest_default ,
110109 "ubuntu" : get_latest_debubu ,
111110}
112111
113112
114113def mirror_image (
115- image , minio_server , minio_bucket , minio_access_key , minio_secret_key
114+ image , extracted_file , minio_server , minio_bucket , minio_access_key , minio_secret_key
116115):
117116 client = Minio (
118117 minio_server ,
@@ -121,14 +120,7 @@ def mirror_image(
121120 )
122121
123122 version = image ["versions" ][0 ]
124-
125- path = urlparse (version ["url" ])
126123 dirname = image ["shortname" ]
127- filename , fileextension = os .path .splitext (os .path .basename (path .path ))
128-
129- if fileextension not in [".bz2" , ".zip" , ".xz" , ".gz" ]:
130- filename += fileextension
131-
132124 shortname = image ["shortname" ]
133125 format = image ["format" ]
134126 new_version = version ["version" ]
@@ -139,23 +131,73 @@ def mirror_image(
139131 logger .info ("'%s' available in '%s'" % (new_filename , dirname ))
140132 except S3Error :
141133 logger .info ("'%s' not yet available in '%s'" % (new_filename , dirname ))
142- logger .info ("Downloading '%s'" % version ["url" ])
143- response = requests .get (version ["url" ], stream = True )
144- with open (os .path .basename (path .path ), "wb" ) as fp :
145- shutil .copyfileobj (response .raw , fp )
146- del response
147-
148- if fileextension in [".bz2" , ".zip" , ".xz" , ".gz" ]:
149- logger .info ("Decompressing '%s'" % os .path .basename (path .path ))
150- patoolib .extract_archive (os .path .basename (path .path ), outdir = "." )
151- os .remove (os .path .basename (path .path ))
152134
153135 logger .info (
154- "Uploading '%s' to '%s' as '%s'" % (filename , dirname , new_filename )
136+ "Uploading '%s' to '%s' as '%s'" % (extracted_file , dirname , new_filename )
155137 )
156138
157- client .fput_object (minio_bucket , os .path .join (dirname , new_filename ), filename )
158- os .remove (filename )
139+ client .fput_object (minio_bucket , os .path .join (dirname , new_filename ), extracted_file )
140+
141+
142+ def size_clean (size ):
143+ size_name = ("B" , "KiB" , "MiB" , "GiB" , "TiB" , "PiB" , "EiB" , "ZiB" , "YiB" )
144+ i = int (math .floor (math .log (size , 1024 )))
145+ s = size / 1024 ** i
146+ return f"{ s :.2f} { size_name [i ]} "
147+
148+
149+ def download_and_hash (download_url : str ):
150+ path = urlparse (download_url )
151+ filename , fileextension = os .path .splitext (os .path .basename (path .path ))
152+ is_archive = fileextension in [".bz2" , ".zip" , ".xz" , ".gz" ]
153+ if not is_archive :
154+ filename += fileextension
155+ download_filename = os .path .basename (path .path )
156+ http_headers = None
157+ hash_obj = hashlib .new ("sha512" )
158+
159+ with requests .get (url = download_url , stream = True , timeout = 30 ) as response :
160+ if response .status_code != 200 :
161+ logger .error (f"Downloading image '{ download_url } ' failed with error code { response .status_code } " )
162+ return None , None , None
163+
164+ http_headers = response .headers
165+ file_size = int (http_headers ["Content-Length" ])
166+ logger .info (f"Image size { size_clean (file_size )} " )
167+
168+ downloadedBytes = 0
169+ lastProgress = 0
170+ with open (download_filename , "wb" ) as fp :
171+ for chunk in response .iter_content (chunk_size = 8192 ):
172+ downloadedBytes += 8192
173+ progressPercent = (downloadedBytes / file_size ) * 100
174+ progress = round (min (max (progressPercent , 0 ), 100 ))
175+ if progress - lastProgress >= 5 :
176+ logger .info (f"Downloading image: { progress } %" )
177+ lastProgress = progress
178+
179+ fp .write (chunk )
180+
181+ if not is_archive :
182+ hash_obj .update (chunk )
183+
184+ if not is_archive :
185+ sha512 = hash_obj .hexdigest ()
186+ return http_headers , f"sha512:{ sha512 } " , download_filename
187+ else :
188+ assert download_filename not in ["" , "." , " " , "/" , ".." ]
189+ logger .info ("Decompressing '%s'" % download_filename )
190+ patoolib .extract_archive (download_filename , outdir = "." )
191+ os .remove (download_filename )
192+
193+ with open (filename , 'rb' ) as fp :
194+ chunk = fp .read (8192 )
195+ while chunk :
196+ hash_obj .update (chunk )
197+ chunk = fp .read (8192 )
198+
199+ sha512 = hash_obj .hexdigest ()
200+ return http_headers , f"sha512:{ sha512 } " , filename
159201
160202
161203def update_image (image , getter , minio_server , minio_bucket , minio_access_key , minio_secret_key ):
@@ -167,7 +209,7 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
167209
168210 latest_checksum_url = image ["latest_checksum_url" ]
169211 logger .info (f"Getting checksums from { latest_checksum_url } " )
170-
212+
171213 shortname = image ["shortname" ]
172214 current_checksum , current_url , current_version = getter (shortname , latest_checksum_url , latest_url )
173215
@@ -181,6 +223,7 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
181223 "checksum" : None ,
182224 "url" : None ,
183225 "version" : None ,
226+ "verify_checksum" : None ,
184227 }
185228 )
186229
@@ -191,12 +234,20 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
191234 logger .info (f"Image { name } is up-to-date, nothing to do" )
192235 return 0
193236
237+ logger .info (f"Image { name } change detected. Downloading Image..." )
238+
239+ headers , verify_checksum , extracted_file = download_and_hash (current_url )
240+ if verify_checksum is None or extracted_file in ["" , "." , " " , "/" , ".." ]:
241+ logger .error (f"Downloading and hashing { name } failed" )
242+ return 0
243+
244+ logger .info (f"Image { name } has the verification checksum { verify_checksum } " )
245+
194246 if current_version is None :
195- logger .info (f"Checking { current_url } " )
247+ logger .info ("Using HTTP 'last-modified' header as current version " )
196248
197- conn = urlopen (current_url , timeout = 30 )
198249 dt = datetime .strptime (
199- conn . headers ["last-modified" ], "%a, %d %b %Y %H:%M:%S %Z"
250+ headers ["last-modified" ], "%a, %d %b %Y %H:%M:%S %Z"
200251 )
201252 current_version = dt .strftime ("%Y%m%d" )
202253
@@ -205,6 +256,7 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
205256 "build_date" : datetime .strptime (current_version , "%Y%m%d" ).date (),
206257 "checksum" : current_checksum ,
207258 "url" : current_url ,
259+ "verify_checksum" : verify_checksum ,
208260 }
209261 logger .info (f"New values are { new_values } " )
210262 image ["versions" ][0 ].update (new_values )
@@ -220,11 +272,15 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
220272
221273 mirror_image (
222274 image ,
275+ extracted_file ,
223276 minio_server ,
224277 minio_bucket ,
225278 minio_access_key ,
226279 minio_secret_key ,
227280 )
281+
282+ os .remove (extracted_file )
283+
228284 return 1
229285
230286
0 commit comments