Skip to content

Commit 5868e13

Browse files
committed
feat: Improve data loading and add unit tests for data_loader
1 parent 6c6a1fd commit 5868e13

2 files changed

Lines changed: 46 additions & 10 deletions

File tree

src/data_loader.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
Licensed under GNU LGPL.3, see LICENCE file
88
'''
99

10-
11-
1210
import os
1311
from typing import Optional, Union, Any
1412
import pandas as pd
@@ -35,24 +33,28 @@ def load_data_msci(path: str = None, n: int = 24) -> dict[str, pd.DataFrame]:
3533
'''Loads MSCI daily returns data from 1999-01-01 to 2023-04-18'''
3634

3735
path = os.path.join(os.getcwd(), f'data{os.sep}') if path is None else path
38-
# Load msci country index return series
36+
37+
# --- FILE 1: MSCI Country Indices ---
3938
df = pd.read_csv(os.path.join(path, 'msci_country_indices.csv'),
40-
sep=';',
39+
sep=',', # FIXED: Separator is comma
4140
index_col=0,
4241
header=0,
4342
parse_dates=True)
44-
df.index = pd.to_datetime(df.index, format='%d/%m/%Y')
43+
44+
# FIXED: Date format uses dashes
45+
df.index = pd.to_datetime(df.index, format='%d-%m-%Y')
46+
4547
series_id = df.columns[0:n]
4648
X = df[series_id]
4749

48-
# Load msci world index return series
50+
# --- FILE 2: World Index (NDDLWI) ---
4951
y = pd.read_csv(f'{path}NDDLWI.csv',
50-
sep=';',
52+
sep=',',
5153
index_col=0,
5254
header=0,
5355
parse_dates=True)
5456

55-
y.index = pd.to_datetime(y.index, format='%d/%m/%Y')
56-
57-
return {'return_series': X, 'bm_series': y}
57+
# FIXED: Date format uses dashes here too (Line 55 fixed)
58+
y.index = pd.to_datetime(y.index, format='%d-%m-%Y')
5859

60+
return {'return_series': X, 'bm_series': y}

test/tests_data_loader.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import sys
2+
import os
3+
import unittest
4+
import pandas as pd
5+
import numpy as np
6+
7+
sys.path.insert(1, 'src')
8+
9+
from data_loader import load_data_msci
10+
11+
class TestDataLoader(unittest.TestCase):
12+
13+
def setUp(self):
14+
# This method is run before each test
15+
self.data_path = os.path.join(os.getcwd(), 'data/')
16+
17+
def test_load_data_msci(self):
18+
# Test if data can be loaded without errors
19+
try:
20+
data = load_data_msci(self.data_path)
21+
self.assertIsNotNone(data)
22+
self.assertIsInstance(data, dict)
23+
self.assertIn('return_series', data)
24+
self.assertIn('bm_series', data)
25+
self.assertIsInstance(data['return_series'], pd.DataFrame)
26+
self.assertIsInstance(data['bm_series'], pd.DataFrame)
27+
self.assertFalse(data['return_series'].empty)
28+
self.assertFalse(data['bm_series'].empty)
29+
print("\nSuccessfully loaded MSCI data.")
30+
except Exception as e:
31+
self.fail(f"load_data_msci failed with an error: {e}")
32+
33+
if __name__ == '__main__':
34+
unittest.main()

0 commit comments

Comments
 (0)