From 1bc6159c3b0906ad564c464046f3ea3d55c45fcf Mon Sep 17 00:00:00 2001 From: Lev Gusiev Date: Mon, 29 Jun 2026 10:03:51 +0200 Subject: [PATCH 1/9] feat(#42): add DataSource --- src/argus/domain/internal_models.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 src/argus/domain/internal_models.py diff --git a/src/argus/domain/internal_models.py b/src/argus/domain/internal_models.py new file mode 100644 index 0000000..4c6d565 --- /dev/null +++ b/src/argus/domain/internal_models.py @@ -0,0 +1,8 @@ +from dataclasses import dataclass + + +@dataclass +class DataSource: + name: str + provider_kind: str + requires_api_key: bool = False \ No newline at end of file From c5d30dc58a430003228a94281723c34c7fffa927 Mon Sep 17 00:00:00 2001 From: Lev Gusiev Date: Mon, 29 Jun 2026 10:21:32 +0200 Subject: [PATCH 2/9] feat(#42): add two more models --- ...-data-sources => research-data-sources.md} | 0 src/argus/domain/internal_models.py | 26 ++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) rename docs/{research-data-sources => research-data-sources.md} (100%) diff --git a/docs/research-data-sources b/docs/research-data-sources.md similarity index 100% rename from docs/research-data-sources rename to docs/research-data-sources.md diff --git a/src/argus/domain/internal_models.py b/src/argus/domain/internal_models.py index 4c6d565..a8a3018 100644 --- a/src/argus/domain/internal_models.py +++ b/src/argus/domain/internal_models.py @@ -1,8 +1,28 @@ from dataclasses import dataclass - - +from datetime import datetime,date @dataclass class DataSource: name: str provider_kind: str - requires_api_key: bool = False \ No newline at end of file + requires_api_key: bool = False + +@dataclass +class Instruments: + symbol: str + name: str + assetclass: str + currency: str + exchange: str + base_currency: str + quote_currency: str + +@dataclass +class PriveBars: + timestamp: date + timeframe: str + open: float + high: float + low: float + close: float + adjusted_close: float + volume: float \ No newline at end of file From d53e56cc7081336cf1b072726b853ec7a31f5f78 Mon Sep 17 00:00:00 2001 From: Lev Gusiev Date: Mon, 29 Jun 2026 10:52:54 +0200 Subject: [PATCH 3/9] docs(#42): update research --- docs/research-databases-and-storage.md | 50 ++++++++++++++------------ src/argus/domain/internal_models.py | 32 ++++++++++------- 2 files changed, 47 insertions(+), 35 deletions(-) diff --git a/docs/research-databases-and-storage.md b/docs/research-databases-and-storage.md index 2061a29..217ee46 100644 --- a/docs/research-databases-and-storage.md +++ b/docs/research-databases-and-storage.md @@ -203,7 +203,7 @@ ARGUS should not use a narrow `date | value` table as the main market-data model That would work for simple exchange rates, but it would become limiting once ARGUS adds stocks, ETFs, indices or broader market APIs. -The first model should focus on three tables: +The first model should focus on three related entities: ```text data_sources @@ -211,11 +211,16 @@ instruments price_bars ``` +> [!NOTE] +> The fields below describe the future database-oriented structure. +> Technical fields such as `id`, `instrument_id`, `source_id`, `created_at` and `updated_at` are expected to appear in the database layer. +> Internal Python models may reference related objects directly, for example `source` and `instrument`, before database IDs exist. + ### data_sources Stores where data came from. -Recommended first fields: +Recommended first database fields: ```text id @@ -226,13 +231,13 @@ created_at updated_at ``` -Example: +Example internal/source records: -| name | provider_kind | requires_api_key | -|---|---|---:| -| Frankfurter | fx_rates | false | -| yfinance | market_prices | false | -| FRED | macro_data | true | +| name | provider_kind | requires_api_key | +| ---------------- | ------------- | ---------------: | +| ExchangeRate API | fx_rates | true | +| yfinance | market_prices | false | +| FRED | macro_data | true | ### instruments @@ -246,7 +251,7 @@ Examples: * S&P 500 * BTC-USD -Recommended first fields: +Recommended first database fields: ```text id @@ -261,19 +266,19 @@ created_at updated_at ``` -Example: +Example instrument records: -| symbol | name | asset_class | currency | exchange | base_currency | quote_currency | -|---|---|---|---|---|---|---| -| EUR/USD | Euro / US Dollar | fx | null | null | EUR | USD | -| AAPL | Apple Inc. | stock | USD | NASDAQ | null | null | -| SPY | SPDR S&P 500 ETF | etf | USD | NYSE Arca | null | null | +| symbol | name | asset_class | currency | exchange | base_currency | quote_currency | +| ------- | ---------------- | ----------- | -------- | --------- | ------------- | -------------- | +| EUR/USD | Euro / US Dollar | fx | null | null | EUR | USD | +| AAPL | Apple Inc. | stock | USD | NASDAQ | null | null | +| SPY | SPDR S&P 500 ETF | etf | USD | NYSE Arca | null | null | ### price_bars Stores historical market data in an OHLCV-ready structure. -Recommended first fields: +Recommended first database fields: ```text id @@ -291,19 +296,20 @@ created_at updated_at ``` -For Frankfurter, the exchange rate can be stored in `close`. +FX-style exchange-rate data can be represented as a price bar by storing the rate in `close`. The other OHLCV fields can stay empty until ARGUS uses data sources that provide them. -Example: +Example price bar records shown with joined source and instrument information for readability: -| symbol | timestamp | timeframe | open | high | low | close | adjusted_close | volume | -|---|---|---|---:|---:|---:|---:|---:|---:| -| EUR/USD | 2024-01-02 | 1d | null | null | null | 1.095 | null | null | -| AAPL | 2024-01-02 | 1d | 187.15 | 188.44 | 183.89 | 185.64 | 184.25 | 50200000 | +| source | instrument | timestamp | timeframe | open | high | low | close | adjusted_close | volume | +| -------- | ---------- | ---------- | --------- | -----: | -----: | -----: | -----: | -------------: | -------: | +| yfinance | EUR/USD | 2024-01-02 | 1d | null | null | null | 1.095 | null | null | +| yfinance | AAPL | 2024-01-02 | 1d | 187.15 | 188.44 | 183.89 | 185.64 | 184.25 | 50200000 | --- + ## Recommended First Implementation Step The first storage implementation should not be tied to one specific data provider. diff --git a/src/argus/domain/internal_models.py b/src/argus/domain/internal_models.py index a8a3018..16e9755 100644 --- a/src/argus/domain/internal_models.py +++ b/src/argus/domain/internal_models.py @@ -1,28 +1,34 @@ from dataclasses import dataclass -from datetime import datetime,date +from datetime import datetime, date + + @dataclass class DataSource: name: str provider_kind: str requires_api_key: bool = False + @dataclass -class Instruments: +class Instrument: symbol: str name: str - assetclass: str - currency: str - exchange: str - base_currency: str - quote_currency: str + asset_class: str + currency: str | None = None + exchange: str | None = None + base_currency: str | None = None + quote_currency: str | None = None + @dataclass -class PriveBars: +class PriceBar: + source: DataSource + instrument: Instrument timestamp: date timeframe: str - open: float - high: float - low: float close: float - adjusted_close: float - volume: float \ No newline at end of file + open: float | None = None + high: float | None = None + low: float | None = None + adjusted_close: float | None = None + volume: float | None = None From 1fcf1a6f8f52fdccef9150c14ffb6c022476bec0 Mon Sep 17 00:00:00 2001 From: Lev Gusiev Date: Mon, 29 Jun 2026 10:57:18 +0200 Subject: [PATCH 4/9] docs(#42): fix markdown and etc --- README.md | 2 +- docs/forecast_research.md | 12 ++++++++++++ docs/research-databases-and-storage.md | 5 ++--- src/argus/domain/internal_models.py | 2 +- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 213fe1d..1c0d156 100644 --- a/README.md +++ b/README.md @@ -357,4 +357,4 @@ Current focus: - add stronger market metrics - expand pandas-based analytics workflows - improve dashboard usefulness without adding unnecessary chart noise -- document metric definitions, assumptions and data-source behavior \ No newline at end of file +- document metric definitions, assumptions and data-source behavior diff --git a/docs/forecast_research.md b/docs/forecast_research.md index 5db8c1b..b0cc9b7 100644 --- a/docs/forecast_research.md +++ b/docs/forecast_research.md @@ -1,30 +1,40 @@ # Research: First Forecasting Approach for Market Time Series ## 1. Realistic First Prediction Task for ARGUS + A realistic first prediction task for ARGUS is **next-day exchange-rate movement** or **trend direction**. Predicting the exact next value (point forecast) is generally much harder and often less useful for trading/signal workflows than predicting the direction of the movement (up/down). A directional classification task serves as a simple, actionable signal for basic workflows. ## 2. Baseline Methods to Implement First + Before jumping into complex models, the following baselines should be implemented to evaluate the added value of any machine learning model: + - **Naive last-value forecast**: The prediction for the next period is exactly the value from the current period. This is surprisingly hard to beat in random walk-like financial time series. - **Moving average forecast**: A simple rolling average to predict the next value or determine trend direction. - **Simple linear regression**: To capture basic linear trends over a given historical window. ## 3. Libraries: NumPy, pandas, or scikit-learn? + The first implementation should use **pandas** and **scikit-learn**: + - **pandas**: Excellent for time-series manipulation, rolling windows, lagging features, and handling missing data. - **scikit-learn**: Offers robust implementations of simple models (e.g., Linear Regression, Logistic Regression for direction) and provides standardized metrics and cross-validation tools designed for time series (e.g., `TimeSeriesSplit`). ## 4. Evaluation Metrics + For the initial approaches, we should focus on: + - **Directional accuracy**: The percentage of times the model correctly predicts the direction of the price movement (up vs down). This is often more relevant than magnitude errors. - **MAE (Mean Absolute Error)**: If point forecasting is used, MAE is more robust to outliers than RMSE and provides a linear penalty for errors. - **RMSE (Root Mean Squared Error)**: Useful to penalize larger errors more heavily, but should be secondary to directional accuracy for basic signal generation. ## 5. Why is LSTM not the first implementation step? + LSTMs are highly complex, require a large amount of well-structured data to train effectively without overfitting, and are notoriously difficult to tune. For financial time series, which suffer from low signal-to-noise ratios, an LSTM is likely to overfit the training data or collapse to predicting the last known value. Starting with an LSTM obscures whether the underlying data has any predictive power and sets a high barrier for debugging and infrastructure. ## 6. Prerequisites for an LSTM Ticket + Before considering LSTMs or other deep learning approaches, the following must be established: + - A reliable data ingestion and preprocessing pipeline. - Established baseline performance metrics (e.g., a naive model and a linear regression model) to compare against. - Sufficient historical data size. @@ -32,7 +42,9 @@ Before considering LSTMs or other deep learning approaches, the following must b - Hardware/infrastructure to support longer training times and hyperparameter tuning. ## 7. Recommended First Implementation Approach + **Recommendation**: Start with **directional trend prediction** (predicting whether the next value is higher or lower than the current value) using a simple **Logistic Regression** model via **scikit-learn**. + - Use **pandas** to create basic lagged features (e.g., previous returns, moving averages). - Evaluate using **directional accuracy**. - Compare performance strictly against a **naive momentum** (predicting the trend continues) or **majority-class** baseline. diff --git a/docs/research-databases-and-storage.md b/docs/research-databases-and-storage.md index 217ee46..484cd61 100644 --- a/docs/research-databases-and-storage.md +++ b/docs/research-databases-and-storage.md @@ -110,7 +110,7 @@ PostgreSQL should be introduced later when ARGUS moves toward a server-based or ## Local, Server and Cloud Options | Option | Meaning | Fit Now | Fit Later | -|---|---|---:|---:| +| --- | --- | ---: | ---: | | Local storage | Database runs locally inside or next to the project | High | High | | Server database | Database runs as a separate service, for example PostgreSQL | Medium | High | | Cloud storage/database | Managed storage or database in the cloud | Low | High | @@ -309,7 +309,6 @@ Example price bar records shown with joined source and instrument information fo --- - ## Recommended First Implementation Step The first storage implementation should not be tied to one specific data provider. @@ -338,7 +337,7 @@ Later sprints can expand the storage layer step by step. Possible later additions: | Future Area | Possible Additions | -|---|---| +| --- | --- | | Better source mapping | source-specific symbols, provider metadata | | Watchlists | user-selected instruments | | Reports | generated report metadata and history | diff --git a/src/argus/domain/internal_models.py b/src/argus/domain/internal_models.py index 16e9755..3b7630e 100644 --- a/src/argus/domain/internal_models.py +++ b/src/argus/domain/internal_models.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from datetime import datetime, date +from datetime import date @dataclass From cef29404dd3a5d7a087a1528e256c6ad6e3eaa6f Mon Sep 17 00:00:00 2001 From: Lev Gusiev Date: Mon, 29 Jun 2026 11:53:42 +0200 Subject: [PATCH 5/9] test(#42): tests for models --- src/argus/domain/internal_models.py | 2 +- tests/test_internal_models.py | 60 +++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tests/test_internal_models.py diff --git a/src/argus/domain/internal_models.py b/src/argus/domain/internal_models.py index 3b7630e..c10eaaa 100644 --- a/src/argus/domain/internal_models.py +++ b/src/argus/domain/internal_models.py @@ -31,4 +31,4 @@ class PriceBar: high: float | None = None low: float | None = None adjusted_close: float | None = None - volume: float | None = None + volume: float | None = None \ No newline at end of file diff --git a/tests/test_internal_models.py b/tests/test_internal_models.py new file mode 100644 index 0000000..d99afd5 --- /dev/null +++ b/tests/test_internal_models.py @@ -0,0 +1,60 @@ +from argus.domain.internal_models import DataSource,Instrument,PriceBar +from datetime import date + +def test_data_source_can_be_created() -> None: + source = DataSource( + name="yfinance", + provider_kind="fx_rates", + ) + + assert source.name == "yfinance" + assert source.provider_kind == "fx_rates" + assert source.requires_api_key is False + +def test_instrument_can_be_created() -> None: + instrument = Instrument( + symbol="EUR/USD", + name="Euro / US Dollar", + asset_class="fx", + base_currency="EUR", + quote_currency="USD" + ) + + assert instrument.symbol == "EUR/USD" + assert instrument.name == "Euro / US Dollar" + assert instrument.asset_class == "fx" + assert instrument.base_currency == "EUR" + assert instrument.quote_currency == "USD" + assert instrument.currency is None + assert instrument.exchange is None + +def test_price_bar_can_be_created() -> None: + source = DataSource( + name="yfinance", + provider_kind="fx_rates", + ) + instrument_rate = Instrument( + symbol="EUR/USD", + name="Euro / US Dollar", + asset_class="fx", + base_currency="EUR", + quote_currency="USD" + ) + pricebar = PriceBar( + source=source, + instrument=instrument_rate, + timestamp=date(2026, 1, 1), + timeframe="1D", + close=1.89 + ) + + assert pricebar.source == "yfinance" + assert pricebar.instrument == "fx_rates" + assert pricebar.timestamp == date(2026, 1, 1) + assert pricebar.timeframe == "1D" + assert pricebar.close == 1.89 + assert pricebar.open is None + assert pricebar.high is None + assert pricebar.low is None + assert pricebar.adjusted_close is None + assert pricebar.volume is None \ No newline at end of file From 70695b67c6bf47839d1dbf1e1ddf95118abc74e6 Mon Sep 17 00:00:00 2001 From: Lev Gusiev Date: Mon, 29 Jun 2026 11:57:06 +0200 Subject: [PATCH 6/9] test(#42): fix tests --- tests/test_internal_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_internal_models.py b/tests/test_internal_models.py index d99afd5..e1e8b40 100644 --- a/tests/test_internal_models.py +++ b/tests/test_internal_models.py @@ -48,8 +48,8 @@ def test_price_bar_can_be_created() -> None: close=1.89 ) - assert pricebar.source == "yfinance" - assert pricebar.instrument == "fx_rates" + assert pricebar.source == source + assert pricebar.instrument == instrument_rate assert pricebar.timestamp == date(2026, 1, 1) assert pricebar.timeframe == "1D" assert pricebar.close == 1.89 From a0a503f55dd348dbf8e4924401790e1068f43cc8 Mon Sep 17 00:00:00 2001 From: Lev Gusiev Date: Mon, 29 Jun 2026 12:04:51 +0200 Subject: [PATCH 7/9] test(#42): a test for OHCLV --- src/argus/domain/internal_models.py | 2 +- tests/test_internal_models.py | 51 ++++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/argus/domain/internal_models.py b/src/argus/domain/internal_models.py index c10eaaa..3b7630e 100644 --- a/src/argus/domain/internal_models.py +++ b/src/argus/domain/internal_models.py @@ -31,4 +31,4 @@ class PriceBar: high: float | None = None low: float | None = None adjusted_close: float | None = None - volume: float | None = None \ No newline at end of file + volume: float | None = None diff --git a/tests/test_internal_models.py b/tests/test_internal_models.py index e1e8b40..2a032db 100644 --- a/tests/test_internal_models.py +++ b/tests/test_internal_models.py @@ -1,6 +1,7 @@ -from argus.domain.internal_models import DataSource,Instrument,PriceBar +from argus.domain.internal_models import DataSource, Instrument, PriceBar from datetime import date + def test_data_source_can_be_created() -> None: source = DataSource( name="yfinance", @@ -11,13 +12,14 @@ def test_data_source_can_be_created() -> None: assert source.provider_kind == "fx_rates" assert source.requires_api_key is False + def test_instrument_can_be_created() -> None: instrument = Instrument( symbol="EUR/USD", name="Euro / US Dollar", asset_class="fx", base_currency="EUR", - quote_currency="USD" + quote_currency="USD", ) assert instrument.symbol == "EUR/USD" @@ -28,24 +30,27 @@ def test_instrument_can_be_created() -> None: assert instrument.currency is None assert instrument.exchange is None + def test_price_bar_can_be_created() -> None: source = DataSource( name="yfinance", provider_kind="fx_rates", ) + instrument_rate = Instrument( symbol="EUR/USD", name="Euro / US Dollar", asset_class="fx", base_currency="EUR", - quote_currency="USD" + quote_currency="USD", ) + pricebar = PriceBar( source=source, instrument=instrument_rate, timestamp=date(2026, 1, 1), timeframe="1D", - close=1.89 + close=1.89, ) assert pricebar.source == source @@ -57,4 +62,40 @@ def test_price_bar_can_be_created() -> None: assert pricebar.high is None assert pricebar.low is None assert pricebar.adjusted_close is None - assert pricebar.volume is None \ No newline at end of file + assert pricebar.volume is None + + +def test_stock_ohlcv_data_can_be_represented_as_price_bar() -> None: + source = DataSource( + name="yfinance", + provider_kind="market_prices", + ) + + instrument = Instrument( + symbol="AAPL", + name="Apple Inc.", + asset_class="stock", + currency="USD", + exchange="NASDAQ", + ) + + price_bar = PriceBar( + source=source, + instrument=instrument, + timestamp=date(2026, 1, 1), + timeframe="1d", + open=187.15, + high=188.44, + low=183.89, + close=185.64, + adjusted_close=184.25, + volume=50_200_000, + ) + + assert price_bar.instrument.symbol == "AAPL" + assert price_bar.open == 187.15 + assert price_bar.high == 188.44 + assert price_bar.low == 183.89 + assert price_bar.close == 185.64 + assert price_bar.adjusted_close == 184.25 + assert price_bar.volume == 50_200_000 From 0e86e91631bd83283c8021d4e09ec51ea3890595 Mon Sep 17 00:00:00 2001 From: Lev Gusiev Date: Mon, 29 Jun 2026 12:10:46 +0200 Subject: [PATCH 8/9] style(#42): edit test names --- tests/test_internal_models.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/test_internal_models.py b/tests/test_internal_models.py index 2a032db..ae477fc 100644 --- a/tests/test_internal_models.py +++ b/tests/test_internal_models.py @@ -31,7 +31,7 @@ def test_instrument_can_be_created() -> None: assert instrument.exchange is None -def test_price_bar_can_be_created() -> None: +def test_rate_bar_can_be_created() -> None: source = DataSource( name="yfinance", provider_kind="fx_rates", @@ -45,27 +45,27 @@ def test_price_bar_can_be_created() -> None: quote_currency="USD", ) - pricebar = PriceBar( + price_bar = PriceBar( source=source, instrument=instrument_rate, timestamp=date(2026, 1, 1), - timeframe="1D", + timeframe="1d", close=1.89, ) - assert pricebar.source == source - assert pricebar.instrument == instrument_rate - assert pricebar.timestamp == date(2026, 1, 1) - assert pricebar.timeframe == "1D" - assert pricebar.close == 1.89 - assert pricebar.open is None - assert pricebar.high is None - assert pricebar.low is None - assert pricebar.adjusted_close is None - assert pricebar.volume is None + assert price_bar.source == source + assert price_bar.instrument == instrument_rate + assert price_bar.timestamp == date(2026, 1, 1) + assert price_bar.timeframe == "1D" + assert price_bar.close == 1.89 + assert price_bar.open is None + assert price_bar.high is None + assert price_bar.low is None + assert price_bar.adjusted_close is None + assert price_bar.volume is None -def test_stock_ohlcv_data_can_be_represented_as_price_bar() -> None: +def test_stock_ohlcv_bar_can_be_created() -> None: source = DataSource( name="yfinance", provider_kind="market_prices", From 9f342e707b5efed73c2859ff2672d4137bcbbb74 Mon Sep 17 00:00:00 2001 From: Lev Gusiev Date: Mon, 29 Jun 2026 12:12:41 +0200 Subject: [PATCH 9/9] test(#42): fix a param --- tests/test_internal_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_internal_models.py b/tests/test_internal_models.py index ae477fc..97df4c6 100644 --- a/tests/test_internal_models.py +++ b/tests/test_internal_models.py @@ -56,7 +56,7 @@ def test_rate_bar_can_be_created() -> None: assert price_bar.source == source assert price_bar.instrument == instrument_rate assert price_bar.timestamp == date(2026, 1, 1) - assert price_bar.timeframe == "1D" + assert price_bar.timeframe == "1d" assert price_bar.close == 1.89 assert price_bar.open is None assert price_bar.high is None