|
7 | 7 | # www.simfin.com - www.github.com/simfin/simfin |
8 | 8 | # See README.md for instructions and LICENSE.txt for license details. |
9 | 9 | ########################################################################## |
| 10 | +import hashlib |
10 | 11 |
|
11 | 12 | import pandas as pd |
12 | 13 | import os |
@@ -429,15 +430,33 @@ def _condition_function(start_date=None, end_date=None): |
429 | 430 | return None |
430 | 431 |
|
431 | 432 |
|
| 433 | +def _calculate_file_hash(file_path, start_date=None, end_date=None, chunk_size=8192): |
| 434 | + hash_obj = hashlib.new("sha256") |
| 435 | + with open(file_path, 'rb') as file: |
| 436 | + while chunk := file.read(chunk_size): |
| 437 | + hash_obj.update(chunk) |
| 438 | + blank_append = "-".encode('utf-8') |
| 439 | + if start_date is not None: |
| 440 | + hash_obj.update(start_date.strftime("%Y-%m-%d").encode('utf-8')) |
| 441 | + else: |
| 442 | + hash_obj.update(blank_append) |
| 443 | + if end_date is not None: |
| 444 | + hash_obj.update(end_date.strftime("%Y-%m-%d").encode('utf-8')) |
| 445 | + else: |
| 446 | + hash_obj.update(blank_append) |
| 447 | + return hash_obj.hexdigest() |
| 448 | + |
| 449 | + |
432 | 450 | def _filtered_file(dataset_path, start_date=None, end_date=None): |
433 | 451 | start_date = _into_date(start_date) |
434 | 452 | end_date = _into_date(end_date) |
435 | | - |
| 453 | + hash_of_file = _calculate_file_hash(dataset_path, start_date, end_date) |
436 | 454 | con_fun = _condition_function(start_date, end_date) |
437 | 455 | # write new data file |
438 | | - new_file_name = os.path.basename(dataset_path)[0:-4] + "_filtered.csv" |
| 456 | + new_file_name = os.path.basename(dataset_path)[0:-4] + f"_{hash_of_file}.csv" |
439 | 457 | new_file_path = os.path.join(os.path.dirname(dataset_path), new_file_name) |
440 | | - |
| 458 | + if os.path.exists(new_file_path): |
| 459 | + return new_file_path |
441 | 460 | with open(new_file_path, 'w', newline='') as csvfile: |
442 | 461 | writer = csv.writer(csvfile, delimiter=';', quotechar='"') |
443 | 462 |
|
|
0 commit comments