Skip to content

Commit 7507692

Browse files
committed
KPMP-6566: try chunks
1 parent 6d84115 commit 7507692

1 file changed

Lines changed: 9 additions & 5 deletions

File tree

data_management/services/dlu_filesystem.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,19 @@
1515

1616

1717
def calculate_checksum(file_path: str):
18-
1918
if os.path.isdir(file_path):
2019
return "0"
20+
2121
if os.path.getsize(file_path) == 0:
22-
# This is apparently the md5 returned for an empty file
2322
return 'd41d8cd98f00b204e9800998ecf8427e'
24-
elif ".zarr" not in file_path:
25-
with open(file_path) as f, mmap(f.fileno(), 0, access=ACCESS_READ) as f:
26-
return md5(f).hexdigest()
23+
24+
if ".zarr" not in file_path:
25+
hash_md5 = hashlib.md5()
26+
with open(file_path, "rb") as f:
27+
# Read in 1MB chunks to keep RAM usage low
28+
for chunk in iter(lambda: f.read(1024 * 1024), b""):
29+
hash_md5.update(chunk)
30+
return hash_md5.hexdigest()
2731
else:
2832
return compute_zarr_checksum(yield_files_local(file_path)).md5
2933

0 commit comments

Comments
 (0)