Skip to content

Commit 05fdfd1

Browse files
committed
added downloading of sunspots, started work on downloading of geomagnetic indexes and CBOE data
1 parent 55878f7 commit 05fdfd1

5 files changed

Lines changed: 83 additions & 5 deletions

File tree

moddata/_utils.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
Dataset: TypeAlias = Literal[
1414
"bankchurn",
1515
"btc",
16-
"pl_banking_stocks"
16+
"pl_banking_stocks",
17+
"sunspots",
18+
"geomagnetic_indexes"
1719
]
1820

1921

@@ -65,15 +67,21 @@ def _load_pl_banking_stocks() -> pd.DataFrame:
6567
))
6668

6769

70+
def _load_sunspots() -> pd.DataFrame:
71+
return pd.read_parquet(str(
72+
resources.files('moddata.data').joinpath('sunspots.parquet')
73+
))
74+
75+
6876
def load_data(dataset: Dataset) -> pd.DataFrame | None:
6977
if dataset == "bankchurn":
7078
return _load_bankchurn()
7179
if dataset == "btc":
7280
return _load_btc()
7381
if dataset == "pl_banking_stocks":
7482
return _load_pl_banking_stocks()
83+
if dataset == "sunspots":
84+
raise _load_sunspots()
85+
if dataset == "geomagnetic_indexes":
86+
raise Exception()
7587
raise ValueError(f"Encountered invalid dataset name: {dataset}")
76-
77-
78-
if __name__ == "__main__":
79-
_load_btc()

moddata/data/sunspots.parquet

521 KB
Binary file not shown.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import pandas as pd
2+
3+
4+
class DownloadCboeDataExtractor:
5+
6+
def extract(self):
7+
pass
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from typing import Final
2+
3+
import pandas as pd
4+
5+
6+
class DownloadGeomagneticIndexExtractor:
7+
8+
_DATA_URL: Final[str] = "https://kp.gfz.de/app/files/Kp_ap_Ap_SN_F107_since_1932.txt"
9+
10+
def extract(self) -> pd.DataFrame:
11+
pass
12+
13+
14+
if __name__ == '__main__':
15+
DownloadGeomagneticIndexExtractor()
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import logging
2+
from typing import Final
3+
4+
import numpy as np
5+
import pandas as pd
6+
7+
logger = logging.getLogger(__name__)
8+
9+
10+
class DownloadSunspotsExtractor:
11+
12+
_DAILY_TOTAL_SUNSPOT_NUMBER_URL: Final[str] = "https://www.sidc.be/SILSO/INFO/sndtotcsv.php"
13+
14+
def extract(self) -> pd.DataFrame:
15+
data = pd.read_csv(
16+
self._DAILY_TOTAL_SUNSPOT_NUMBER_URL,
17+
delimiter=";",
18+
decimal=".",
19+
names=[
20+
"year", "month", "day", "yearfrac_date",
21+
"daily_sunspots_number",
22+
"daily_std_across_stations",
23+
"obs_num", "is_definitive"
24+
],
25+
na_values=-1
26+
)
27+
data = data[["year", "month", "day", "daily_sunspots_number"]]
28+
data["day"] = (
29+
data["year"].astype(str) + "-" +
30+
data["month"].apply(lambda x: f"{x:02}") + "-" +
31+
data["day"].apply( lambda x: f"{x:02}")
32+
)
33+
data = data[["day", "daily_sunspots_number"]]
34+
data["daily_sunspots_number"] = np.where(
35+
data["daily_sunspots_number"] == -1,
36+
np.nan,
37+
data["daily_sunspots_number"]
38+
)
39+
return data
40+
41+
42+
if __name__ == "__main__":
43+
from pathlib import Path
44+
data = DownloadSunspotsExtractor().extract()
45+
data.to_parquet(
46+
Path(__file__).parent.parent / "data" / "sunspots.parquet"
47+
)
48+
print("halt")

0 commit comments

Comments
 (0)