|
| 1 | +import logging |
| 2 | +from typing import Final |
| 3 | + |
| 4 | +import numpy as np |
| 5 | +import pandas as pd |
| 6 | + |
| 7 | +logger = logging.getLogger(__name__) |
| 8 | + |
| 9 | + |
| 10 | +class DownloadSunspotsExtractor: |
| 11 | + |
| 12 | + _DAILY_TOTAL_SUNSPOT_NUMBER_URL: Final[str] = "https://www.sidc.be/SILSO/INFO/sndtotcsv.php" |
| 13 | + |
| 14 | + def extract(self) -> pd.DataFrame: |
| 15 | + data = pd.read_csv( |
| 16 | + self._DAILY_TOTAL_SUNSPOT_NUMBER_URL, |
| 17 | + delimiter=";", |
| 18 | + decimal=".", |
| 19 | + names=[ |
| 20 | + "year", "month", "day", "yearfrac_date", |
| 21 | + "daily_sunspots_number", |
| 22 | + "daily_std_across_stations", |
| 23 | + "obs_num", "is_definitive" |
| 24 | + ], |
| 25 | + na_values=-1 |
| 26 | + ) |
| 27 | + data = data[["year", "month", "day", "daily_sunspots_number"]] |
| 28 | + data["day"] = ( |
| 29 | + data["year"].astype(str) + "-" + |
| 30 | + data["month"].apply(lambda x: f"{x:02}") + "-" + |
| 31 | + data["day"].apply( lambda x: f"{x:02}") |
| 32 | + ) |
| 33 | + data = data[["day", "daily_sunspots_number"]] |
| 34 | + data["daily_sunspots_number"] = np.where( |
| 35 | + data["daily_sunspots_number"] == -1, |
| 36 | + np.nan, |
| 37 | + data["daily_sunspots_number"] |
| 38 | + ) |
| 39 | + return data |
| 40 | + |
| 41 | + |
| 42 | +if __name__ == "__main__": |
| 43 | + from pathlib import Path |
| 44 | + data = DownloadSunspotsExtractor().extract() |
| 45 | + data.to_parquet( |
| 46 | + Path(__file__).parent.parent / "data" / "sunspots.parquet" |
| 47 | + ) |
| 48 | + print("halt") |
0 commit comments