Skip to content

Commit 413ec16

Browse files
authored
Merge pull request #21 from SimFin/DEV-606-python-api-dont-apply-filter-eve
Dev 606 python api dont apply filter eve
2 parents 83b4276 + e59ad5f commit 413ec16

4 files changed

Lines changed: 24 additions & 6 deletions

File tree

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
# This is also defined in simfin/__init__.py and must be
55
# updated in both places.
6-
MY_VERSION = '0.9.2'
6+
MY_VERSION = '0.9.3'
77

88
setup(
99
name='simfin',

simfin/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# This is also defined in setup.py and must be updated in both places.
2-
__version__ = "0.9.1"
2+
__version__ = "0.9.3"
33

44
# Expose the following as top-level imports.
55

simfin/utils.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# www.simfin.com - www.github.com/simfin/simfin
88
# See README.md for instructions and LICENSE.txt for license details.
99
##########################################################################
10+
import hashlib
1011

1112
import pandas as pd
1213
import os
@@ -429,15 +430,33 @@ def _condition_function(start_date=None, end_date=None):
429430
return None
430431

431432

433+
def _calculate_file_hash(file_path, start_date=None, end_date=None, chunk_size=8192):
434+
hash_obj = hashlib.new("sha256")
435+
with open(file_path, 'rb') as file:
436+
while chunk := file.read(chunk_size):
437+
hash_obj.update(chunk)
438+
blank_append = "-".encode('utf-8')
439+
if start_date is not None:
440+
hash_obj.update(start_date.strftime("%Y-%m-%d").encode('utf-8'))
441+
else:
442+
hash_obj.update(blank_append)
443+
if end_date is not None:
444+
hash_obj.update(end_date.strftime("%Y-%m-%d").encode('utf-8'))
445+
else:
446+
hash_obj.update(blank_append)
447+
return hash_obj.hexdigest()
448+
449+
432450
def _filtered_file(dataset_path, start_date=None, end_date=None):
433451
start_date = _into_date(start_date)
434452
end_date = _into_date(end_date)
435-
453+
hash_of_file = _calculate_file_hash(dataset_path, start_date, end_date)
436454
con_fun = _condition_function(start_date, end_date)
437455
# write new data file
438-
new_file_name = os.path.basename(dataset_path)[0:-4] + "_filtered.csv"
456+
new_file_name = os.path.basename(dataset_path)[0:-4] + f"_{hash_of_file}.csv"
439457
new_file_path = os.path.join(os.path.dirname(dataset_path), new_file_name)
440-
458+
if os.path.exists(new_file_path):
459+
return new_file_path
441460
with open(new_file_path, 'w', newline='') as csvfile:
442461
writer = csv.writer(csvfile, delimiter=';', quotechar='"')
443462

tests/test_load.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,6 @@ def test_load_industries():
263263
##########################################################################
264264
def test_load_all():
265265
for i, item in enumerate(all_dataset_test):
266-
#print(i, item)
267266
t = sf.load(dataset=item["dataset"],
268267
variant=item["variant"],
269268
market=item["market"],

0 commit comments

Comments
 (0)