diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..8c24b79 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,25 @@ +# Git +.git +.github +.githooks +.gitignore + +# Local environment and secrets +.venv +.env + +# Python cache and test cache +__pycache__ +.pytest_cache +.ruff_cache +.mypy_cache +.coverage +htmlcov + +# Build artifacts +dist +build +*.egg-info + +# Project docs not needed for the test image +docs \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0b53865..3f87759 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,33 +4,9 @@ Thank you for your interest in contributing to ARGUS. ARGUS is a Python-based market analytics project focused on clean data workflows, reliable code, useful metrics and future AI-assisted monitoring. -This project is still growing, so contributions should help the project become more stable, understandable and useful step by step. +The project is still growing, so contributions should be small, focused and easy to review. You do not need to be an expert to contribute, but your changes should be understandable, reliable and related to the current project direction. -> [!IMPORTANT] -> ARGUS values reliability, clear communication and long-term skill building. -> Contributions should improve the project without creating unnecessary complexity. - ---- - -## Project Mindset - -ARGUS is not only about adding features quickly. - -The project is built around: - -- clean Python code -- understandable architecture -- reliable tests -- useful documentation -- careful data handling -- practical analytics -- continuous learning - -Good contributions should make the project easier to use, test, maintain or extend. - ---- - -## What You Can Contribute +Good starting points are issues labeled `good first issue`. These issues are usually smaller, easier to review and better suited for getting familiar with the project. Helpful contributions include: @@ -38,17 +14,14 @@ Helpful contributions include: - tests - documentation improvements - small refactorings -- validation improvements - analytics metrics -- chart improvements - data-source clients -- CI/CD improvements -- issue clarification -- architecture notes -- examples and usage instructions +- UI or chart improvements +- CI/CD and tooling improvements +- architecture or research notes -> [!NOTE] -> Large features should usually start with an issue or short discussion before implementation. +> [!IMPORTANT] +> Please keep changes focused and avoid adding unnecessary complexity. --- @@ -76,46 +49,45 @@ Bad examples: --- -## Development Setup +## Contribution Expectations -Clone the repository: +Contributors are expected to keep changes focused, understandable and related to the issue or task. -```bash -git clone https://github.com/BytecodeBrewer/argus.git -cd argus -``` +Please: -Create a virtual environment: +- explain your changes clearly +- be open to review feedback +- improve your contribution step by step after feedback +- avoid unrelated rewrites +- respect the existing architecture unless there is a clear reason to change it +- do not add scripts that automatically run `git add`, `git commit`, `git push` or create pull requests unless this was discussed first -```bash -python -m venv .venv -``` +A contribution may be declined or delayed if it: -Activate it. +- does not fit the current roadmap +- adds too much complexity too early +- breaks existing functionality +- lacks necessary checks or documentation +- duplicates existing work +- bypasses the repository workflow -On Windows PowerShell: +--- -```powershell -.venv\Scripts\Activate.ps1 -``` +## Branch Workflow -On macOS/Linux: +For issue-based work, create your branch from the related GitHub issue when possible. -```bash -source .venv/bin/activate -``` +GitHub may suggest a branch name based on the issue title. You can shorten it if the generated name is too long. -Install the project with development dependencies: +Good branch names are focused and describe the task: -```bash -pip install -e ".[dev]" +```text +43-research-forecasting-approach +33-add-yfinance-client +40-improve-test-coverage ``` ---- - -## Branch Workflow - -Create a new branch for your work: +If you create the branch manually, use: ```bash git checkout -b @@ -124,24 +96,30 @@ git checkout -b Example: ```bash -git checkout -b 12-add-volatility-metric +git checkout -b 43-research-forecasting-approach ``` -Use focused branch names that describe the work. - --- ## Commit Expectations Commits should be small, understandable and related to the current task. +ARGUS uses a conventional commit style with an issue reference: + +```text +type(#issue): short description +``` + Good commit messages: ```text -Add rolling volatility metric -Fix currency validation edge case -Update README setup instructions -Add tests for trend metrics +docs(#43): research first forecasting approach +feat(#33): add yfinance historical data client +test(#40): add tests for conversion service +fix(#33): handle empty historical data response +refactor(#34): split metric helpers +ci(#10): update commit message workflow ``` Avoid unclear messages: @@ -155,21 +133,25 @@ final ``` > [!TIP] -> A good commit tells future readers what changed and why it belongs to the task. +> A good commit tells future readers what changed and which issue it belongs to. --- -## Testing +## Checks -Before opening a pull request, run the test suite: +Before opening a pull request, run the project checks: ```bash pytest +ruff check . +ruff format --check . ``` -A pull request should not be opened as ready for review if tests are failing without explanation. +These checks verify that tests pass, code style is valid and formatting is consistent. + +A pull request should not be marked as ready for review if checks are failing without explanation. -If a test fails and you do not know why, mention it clearly in the pull request. +If a check fails and you are unsure why, mention it clearly in the pull request. > [!IMPORTANT] > CI checks must pass before a pull request can be merged. @@ -178,65 +160,23 @@ If a test fails and you do not know why, mention it clearly in the pull request. ## Pull Request Expectations -A good pull request should include: +Pull requests should target `develop` unless the maintainer explicitly says otherwise. -- a clear title -- a short explanation of what changed -- a link to the related issue if available -- notes about tests -- screenshots for UI changes if useful -- a short explanation of any trade-offs +Do not open feature, research or documentation pull requests directly against `main`. +The `main` branch is reserved for stable/release-ready changes. -Pull requests should be focused and reviewable. +Please use the pull request template and fill it out clearly. -Before opening a pull request, run: +The template helps reviewers understand: -```bash -pytest -ruff check . -ruff format --check . -``` - ---- - -## Reliability Expectations - -Contributors are expected to work reliably. +- what changed +- which issue is related +- whether tests were run +- whether documentation or screenshots are needed +- if there are any notes or trade-offs -This means: - -- do not submit random or unfinished code without context -- do not ignore failing tests -- do not introduce secrets, API keys or local machine paths -- do not rewrite unrelated parts of the project without discussion -- communicate if you are unsure -- keep changes understandable for future contributors -- respect the existing architecture unless there is a clear reason to change it - -Reliability does not mean knowing everything already. - -It means being honest, careful and consistent. - ---- - -## Learning Mindset - -ARGUS welcomes contributors who want to improve their technical skills. - -You do not need to be an expert to contribute. - -Helpful behavior includes: - -- asking clear questions -- explaining your reasoning -- being open to review feedback -- improving your code after feedback -- learning from tests, errors and architecture discussions -- documenting what you learned when it helps others - -> [!NOTE] -> This project values skill growth. -> A thoughtful small contribution is better than a large unclear one. +Do not bypass the pull request template or replace it with an unrelated auto-generated description. +It makes reviewing harder and may delay the merge. --- @@ -265,25 +205,14 @@ For analytics code: ## Secrets and API Keys -Never commit secrets. - -Do not commit: +Never commit secrets, API keys, tokens, passwords, `.env` files or local config files with private data. -- API keys -- tokens -- passwords -- `.env` files -- local config files with private data - -Use a local `.env` file for secrets. +Use a local `.env` file for secrets: ```env -api_key=your_api_key_here +EXCHANGE_RATE_API_KEY=your_api_key_here ``` -> [!WARNING] -> If you accidentally commit a secret, revoke it immediately and inform the maintainer. - --- ## Documentation @@ -299,44 +228,4 @@ Useful documentation includes: - data-source assumptions - troubleshooting notes -Repository-level files such as `README.md`, `CONTRIBUTING.md`, `CODE_OF_CONDUCT.md` and `LICENSE` belong in the repository root. - Technical notes, research and deeper explanations belong in `docs/`. - ---- - -## Communication - -Please communicate respectfully and constructively. - -When giving feedback: - -- focus on the code or idea, not the person -- explain the reason behind suggestions -- be specific -- stay open to alternatives - -When receiving feedback: - -- assume good intent -- ask questions if something is unclear -- improve the contribution step by step - -All contributors are expected to follow the project’s Code of Conduct. - ---- - -## Maintainer Notes - -The maintainer may ask for changes before merging a pull request. - -A contribution may be declined if it: - -- does not fit the current roadmap -- adds too much complexity too early -- breaks existing functionality -- lacks necessary tests -- duplicates existing work -- does not follow the project’s quality expectations - -This helps keep ARGUS stable, learnable and maintainable. \ No newline at end of file diff --git a/README.md b/README.md index 607f0d0..8723389 100644 --- a/README.md +++ b/README.md @@ -120,11 +120,18 @@ README.md - Tkinter - pytest -### Current data source +### Current data sources - ExchangeRate API for live currency conversion - yfinance for historical market-data retrieval and analytics +### Storage + +- DuckDB — local analytical storage for normalized historical market data + +>[!Note] +> See docs/storage.md for details. + --- ## Planned / Future Tech Stack @@ -138,46 +145,32 @@ Planned or likely future technologies include: - Frankfurter API for historical FX data - possible additional market-data APIs later -### Data processing - -- pandas -- NumPy -- possibly Polars later for larger datasets - ### Storage - PostgreSQL -- DuckDB -- Parquet -- optional cloud storage ### Visualization and UI -- matplotlib -- Plotly - NiceGUI +- Django ### DevOps and deployment -- GitHub Actions -- Docker - Docker Compose -- cloud deployment later +- Travis CI ### Cloud and data engineering -- Azure, GCP or AWS depending on project direction +- Azure - scheduled ingestion -- data quality checks -- reporting pipelines +- agentic Workflows +- Blob Storage +- scaled analysis ### AI and agentic workflows - LLM-assisted summaries - RAG over stored reports or notes -- agentic data checks -- anomaly monitoring -- human-in-the-loop signal review > [!CAUTION] > AI and agentic features are future-stage ideas. @@ -199,6 +192,7 @@ Recommended for development: - VS Code - a virtual environment - pytest +- Docker, if you want to run tests in an isolated container environment > [!NOTE] > Runtime dependencies are managed through `pyproject.toml`. @@ -254,7 +248,7 @@ pip install -e ".[dev]" ## API Key Setup -ARGUS currently uses the ExchangeRate API for live currency conversion. +ARGUS uses the ExchangeRate API for live currency conversion. Historical analytics currently use yfinance and do not require an additional API key. ### 1. Create an API key @@ -284,7 +278,7 @@ The `.env` file must stay local and should never be committed. --- -## Running ARGUS +## Running ARGUS Locally Start the current Tkinter GUI: @@ -294,6 +288,22 @@ python -m argus.main This starts the local ARGUS prototype with calculator, currency conversion and basic analytics views. +## Running Argus in Docker + +ARGUS includes a minimal Docker setup for running the test suite in an isolated container environment. + +Build the Docker image: + +```bash +docker build -t argus . +``` + +Run ARGUS in a container: + +```bash +docker run --rm argus +``` + ### Legacy CLI / Debug Interface The legacy CLI is still available for quick local checks and debugging: @@ -310,7 +320,7 @@ python src/legacy/debug_main.py ## Running Tests -Run the test suite: +Run the test suite locally: ```bash pytest @@ -346,4 +356,4 @@ Current focus: - add stronger market metrics - expand pandas-based analytics workflows - improve dashboard usefulness without adding unnecessary chart noise -- document metric definitions, assumptions and data-source behavior \ No newline at end of file +- document metric definitions, assumptions and data-source behavior diff --git a/argus_probe.duckdb b/argus_probe.duckdb new file mode 100644 index 0000000..9c9ef34 Binary files /dev/null and b/argus_probe.duckdb differ diff --git a/dockerfile b/dockerfile new file mode 100644 index 0000000..1dce2e8 --- /dev/null +++ b/dockerfile @@ -0,0 +1,12 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY pyproject.toml README.md ./ +COPY src/ ./src/ +COPY tests/ ./tests/ + +RUN python -m pip install --upgrade pip \ + && pip install -e ".[dev]" + +CMD ["pytest"] \ No newline at end of file diff --git a/docs/forecast_research.md b/docs/forecast_research.md new file mode 100644 index 0000000..b0cc9b7 --- /dev/null +++ b/docs/forecast_research.md @@ -0,0 +1,50 @@ +# Research: First Forecasting Approach for Market Time Series + +## 1. Realistic First Prediction Task for ARGUS + +A realistic first prediction task for ARGUS is **next-day exchange-rate movement** or **trend direction**. Predicting the exact next value (point forecast) is generally much harder and often less useful for trading/signal workflows than predicting the direction of the movement (up/down). A directional classification task serves as a simple, actionable signal for basic workflows. + +## 2. Baseline Methods to Implement First + +Before jumping into complex models, the following baselines should be implemented to evaluate the added value of any machine learning model: + +- **Naive last-value forecast**: The prediction for the next period is exactly the value from the current period. This is surprisingly hard to beat in random walk-like financial time series. +- **Moving average forecast**: A simple rolling average to predict the next value or determine trend direction. +- **Simple linear regression**: To capture basic linear trends over a given historical window. + +## 3. Libraries: NumPy, pandas, or scikit-learn? + +The first implementation should use **pandas** and **scikit-learn**: + +- **pandas**: Excellent for time-series manipulation, rolling windows, lagging features, and handling missing data. +- **scikit-learn**: Offers robust implementations of simple models (e.g., Linear Regression, Logistic Regression for direction) and provides standardized metrics and cross-validation tools designed for time series (e.g., `TimeSeriesSplit`). + +## 4. Evaluation Metrics + +For the initial approaches, we should focus on: + +- **Directional accuracy**: The percentage of times the model correctly predicts the direction of the price movement (up vs down). This is often more relevant than magnitude errors. +- **MAE (Mean Absolute Error)**: If point forecasting is used, MAE is more robust to outliers than RMSE and provides a linear penalty for errors. +- **RMSE (Root Mean Squared Error)**: Useful to penalize larger errors more heavily, but should be secondary to directional accuracy for basic signal generation. + +## 5. Why is LSTM not the first implementation step? + +LSTMs are highly complex, require a large amount of well-structured data to train effectively without overfitting, and are notoriously difficult to tune. For financial time series, which suffer from low signal-to-noise ratios, an LSTM is likely to overfit the training data or collapse to predicting the last known value. Starting with an LSTM obscures whether the underlying data has any predictive power and sets a high barrier for debugging and infrastructure. + +## 6. Prerequisites for an LSTM Ticket + +Before considering LSTMs or other deep learning approaches, the following must be established: + +- A reliable data ingestion and preprocessing pipeline. +- Established baseline performance metrics (e.g., a naive model and a linear regression model) to compare against. +- Sufficient historical data size. +- A robust backtesting and cross-validation framework to ensure the LSTM isn't just memorizing data or overfitting. +- Hardware/infrastructure to support longer training times and hyperparameter tuning. + +## 7. Recommended First Implementation Approach + +**Recommendation**: Start with **directional trend prediction** (predicting whether the next value is higher or lower than the current value) using a simple **Logistic Regression** model via **scikit-learn**. + +- Use **pandas** to create basic lagged features (e.g., previous returns, moving averages). +- Evaluate using **directional accuracy**. +- Compare performance strictly against a **naive momentum** (predicting the trend continues) or **majority-class** baseline. diff --git a/docs/research-data-sources b/docs/research-data-sources.md similarity index 100% rename from docs/research-data-sources rename to docs/research-data-sources.md diff --git a/docs/research-databases-and-storage.md b/docs/research-databases-and-storage.md new file mode 100644 index 0000000..484cd61 --- /dev/null +++ b/docs/research-databases-and-storage.md @@ -0,0 +1,388 @@ +# ARGUS Storage Research + +## Goal + +Research what ARGUS should store and which database/storage approach fits the project. + +ARGUS is moving from live API requests and in-memory analytics toward real data workflows. +The first storage decision should support local market analytics, SQL practice and future dashboard features without adding unnecessary infrastructure too early. + +--- + +## Storage Use Cases + +ARGUS should eventually store different kinds of data, but not all of them need to be implemented at once. + +Relevant storage use cases are: + +* historical exchange rates +* cleaned historical market data +* source information +* instruments that ARGUS can analyze +* later watchlists +* later generated reports +* later macroeconomic data +* later paper-trading history + +The first implementation should focus on historical market data and the basic entities needed to query it. + +--- + +## Storage Candidates + +ARGUS should compare storage options based on the current project phase. + +The project currently needs local analytical storage, not a full server or cloud database. + +### DuckDB + +DuckDB is a local analytical database. + +It is a strong fit for ARGUS because it supports SQL-based analytics without requiring a database server. + +Useful for: + +* historical market data +* local time-series analysis +* SQL practice +* Python-based analytics +* notebook-based exploration +* dashboard data preparation + +Limitations: + +* not a server database +* less suitable for multi-user product features later + +--- + +### SQLite + +SQLite is a simple local database. + +It is strong for small app storage and simple persistence. + +Useful for: + +* settings +* small app-state data +* simple local tables +* later watchlists +* lightweight metadata + +Limitations: + +* less analytics-focused than DuckDB +* not ideal as the main storage layer for historical market data +* better for app-state than analytical time-series queries + +--- + +### PostgreSQL + +PostgreSQL is a server-based relational database. + +It is a strong long-term option when ARGUS becomes more product-like. + +Useful for: + +* server-based storage +* user-facing features +* report history +* watchlists +* paper-trading history +* richer metadata +* cloud-ready architecture +* SQLGate usage later + +Limitations: + +* more setup than needed right now +* requires server or Docker setup +* adds infrastructure complexity too early + +Fit for ARGUS: + +PostgreSQL should be introduced later when ARGUS moves toward a server-based or cloud-ready architecture. + +--- + +## Local, Server and Cloud Options + +| Option | Meaning | Fit Now | Fit Later | +| --- | --- | ---: | ---: | +| Local storage | Database runs locally inside or next to the project | High | High | +| Server database | Database runs as a separate service, for example PostgreSQL | Medium | High | +| Cloud storage/database | Managed storage or database in the cloud | Low | High | + +ARGUS should start with local storage. + +Reason: + +* simpler setup +* easier learning curve +* good fit for a Python analytics project +* no cloud or server infrastructure required yet +* enough for historical data, metrics and dashboard development + +Server and cloud storage should come later when ARGUS has stronger product features such as reports, user state, paper-trading history or deployment needs. + +--- + +## Recommended First Storage Approach + +DuckDB should be the first storage technology for ARGUS. + +Reason: + +* ARGUS currently needs local analytical storage, not a full server database +* DuckDB fits historical time-series analysis well +* it supports SQL-based analytics without requiring a database server +* it works well with Python and notebook-based exploration +* it keeps the first storage implementation manageable +* it can later be replaced or complemented by PostgreSQL if ARGUS becomes more product-like + +The first storage implementation should focus on: + +* historical market data +* cleaned OHLCV-ready price data +* source information +* instruments that ARGUS can analyze + +PostgreSQL and SQLGate become more relevant later. + +For the first DuckDB phase, the goal is to build a clean local analytics workflow. + +--- + +## Developer Interaction Workflow + +ARGUS should use a practical developer workflow for DuckDB. + +The goal is to make the database easy to inspect, explore and validate before logic is moved into production code. + +### Notebook Exploration + +Notebooks should be the main exploration layer. + +They are useful for: + +* opening the DuckDB database +* testing SQL queries +* validating imported data +* comparing SQL results with pandas calculations +* exploring metric logic +* documenting research assumptions + +This workflow is especially useful before turning queries into reusable project code. + +Notebook exploration should be preferred over a GUI database tool in the first phase. + +### DuckDB CLI + +The DuckDB CLI should be used for quick database inspection. + +It is useful for: + +* checking available tables +* running small SQL queries +* validating stored records +* debugging the local database file + +The CLI is not the main research environment, but it is useful as a fast inspection tool. + +A GUI tool such as DBeaver can be tested if needed, but it should stay optional. + +--- + +## First Data Model Direction + +The first data model should support FX data now and broader market data later. + +ARGUS should not use a narrow `date | value` table as the main market-data model. + +That would work for simple exchange rates, but it would become limiting once ARGUS adds stocks, ETFs, indices or broader market APIs. + +The first model should focus on three related entities: + +```text +data_sources +instruments +price_bars +``` + +> [!NOTE] +> The fields below describe the future database-oriented structure. +> Technical fields such as `id`, `instrument_id`, `source_id`, `created_at` and `updated_at` are expected to appear in the database layer. +> Internal Python models may reference related objects directly, for example `source` and `instrument`, before database IDs exist. + +### data_sources + +Stores where data came from. + +Recommended first database fields: + +```text +id +name +provider_kind +requires_api_key +created_at +updated_at +``` + +Example internal/source records: + +| name | provider_kind | requires_api_key | +| ---------------- | ------------- | ---------------: | +| ExchangeRate API | fx_rates | true | +| yfinance | market_prices | false | +| FRED | macro_data | true | + +### instruments + +Stores what ARGUS can analyze. + +Examples: + +* EUR/USD +* AAPL +* SPY +* S&P 500 +* BTC-USD + +Recommended first database fields: + +```text +id +symbol +name +asset_class +currency +exchange +base_currency +quote_currency +created_at +updated_at +``` + +Example instrument records: + +| symbol | name | asset_class | currency | exchange | base_currency | quote_currency | +| ------- | ---------------- | ----------- | -------- | --------- | ------------- | -------------- | +| EUR/USD | Euro / US Dollar | fx | null | null | EUR | USD | +| AAPL | Apple Inc. | stock | USD | NASDAQ | null | null | +| SPY | SPDR S&P 500 ETF | etf | USD | NYSE Arca | null | null | + +### price_bars + +Stores historical market data in an OHLCV-ready structure. + +Recommended first database fields: + +```text +id +instrument_id +source_id +timestamp +timeframe +open +high +low +close +adjusted_close +volume +created_at +updated_at +``` + +FX-style exchange-rate data can be represented as a price bar by storing the rate in `close`. + +The other OHLCV fields can stay empty until ARGUS uses data sources that provide them. + +Example price bar records shown with joined source and instrument information for readability: + +| source | instrument | timestamp | timeframe | open | high | low | close | adjusted_close | volume | +| -------- | ---------- | ---------- | --------- | -----: | -----: | -----: | -----: | -------------: | -------: | +| yfinance | EUR/USD | 2024-01-02 | 1d | null | null | null | 1.095 | null | null | +| yfinance | AAPL | 2024-01-02 | 1d | 187.15 | 188.44 | 183.89 | 185.64 | 184.25 | 50200000 | + +--- + +## Recommended First Implementation Step + +The first storage implementation should not be tied to one specific data provider. + +ARGUS currently works with an existing ExchangeRate API client and evaluates broader market data through yfinance. +Frankfurter may be added later as a stronger FX-oriented historical data source. + +The storage layer should therefore focus on a normalized internal market-data format instead of depending on one API response structure. + +Recommended first step: + +```text +active data client +→ normalize into instruments and price_bars +→ store in DuckDB +→ query with SQL +→ use results for analytics and charts +``` + +--- + +## Future Direction + +Later sprints can expand the storage layer step by step. + +Possible later additions: + +| Future Area | Possible Additions | +| --- | --- | +| Better source mapping | source-specific symbols, provider metadata | +| Watchlists | user-selected instruments | +| Reports | generated report metadata and history | +| Macro data | FRED indicators and observations | +| Paper trading | simulated orders, positions and portfolio history | +| Server architecture | PostgreSQL | +| SQL tooling | SQLGate with PostgreSQL | +| Cloud direction | managed PostgreSQL or cloud storage | + +SQLGate should be kept for a later PostgreSQL phase. + +It becomes useful when ARGUS moves toward: + +* server-based storage +* stronger database management +* richer metadata +* more stable application state +* user-facing features +* report history +* cloud-ready architecture + +Additional metadata such as documentation links, terms links or provider governance fields can also become useful later. + +For the first DuckDB phase, these details should stay in research documentation instead of the database schema. + +--- + +## Final Recommendation + +ARGUS should start with DuckDB as the first local analytics storage layer. + +DuckDB fits the current phase best because ARGUS needs local analytical SQL workflows, not a full server database yet. + +The first implementation should store historical market data in an OHLCV-ready structure. + +The recommended first data model is: + +```text +data_sources +instruments +price_bars +``` + +Notebook exploration should be the main developer workflow before SQL logic is moved into application code. + +The DuckDB CLI can be used for quick inspection. + +PostgreSQL and SQLGate should be introduced later when ARGUS moves toward a more product-like or cloud-based architecture. diff --git a/docs/roadmap.md b/docs/roadmap.md index 85706af..c637327 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -27,92 +27,121 @@ Scope: Outcome: Sprint 1 established the local ARGUS foundation with package structure, GUI prototype, analytics prototype, tests, documentation, CI, Dependabot and governance files. -### Sprint 2 — Market Analytics & Data Source Expansion +### Sprint 2 — Reporting & Market Analytics Foundation **Status:** In progress -Move from simple FX conversion toward broader market analytics. +Move ARGUS from a simple FX-focused prototype toward a first usable market analytics and reporting tool. -Scope: +**Scope:** + +- Add stronger market analytics metrics: -- Add stronger market metrics: - cumulative return - strongest / weakest day - rolling volatility - - performance analytics - - risk analytics -- Extend the current dashboard without adding unnecessary chart noise -- Add or evaluate new data clients: - - Frankfurter for historical FX data + - basic performance analytics + - basic risk analytics +- Add or improve real market data support: + - yfinance for broader market data -- Replace or reduce dependency on the current ExchangeRate API where needed + - existing FX conversion remains available where useful - Improve pandas-based analysis workflows -- Add tests for metric calculations and data transformations -- Document metric definitions, assumptions and chart behavior +- Introduce local storage for historical market data +- Add report generation and export +- Add a first simple prediction feature +- Introduce NiceGUI as the next GUI direction +- Extend the current dashboard with real market analytics +- Add tests for metric calculations, data transformations and storage behavior +- Improve CI/CD with first deployment or release automation steps -Outcome: -ARGUS becomes a basic market analytics tool, not only a converter. +**Outcome:** + +ARGUS becomes a basic market analytics and reporting tool. +Users can fetch market data, store it locally, calculate metrics, generate a first report and view results through a first modern dashboard. -### Sprint 3 — Storage, Web-Ready UI & Data Architecture +--- + +### Sprint 3 — Advanced Local Analytics & Product Quality **Status:** Planned -Prepare ARGUS for persistent data workflows and a stronger product interface. +Expand the local ARGUS application into a stronger analytics product with better data handling, UI structure, predictions and quality checks. -Scope: +**Scope:** -- Add local storage layer: - - PostgreSQL, DuckDB, SQLite or Parquet depending on use case -- Store historical market data -- Separate ingestion, transformation, analytics and presentation layers more clearly -- Start NiceGUI as the main web-ready UI direction -- Keep Tkinter as legacy/prototype unless still useful -- Keep CLI as internal/debug interface only -- Add clearer architecture documentation -- Prepare the project for larger data workflows and external contributors +- Extend the local storage layer +- Add a first local ETL workflow +- Improve the NiceGUI dashboard structure and usability +- Explore how NiceGUI can later interact with a more modern frontend stack such as Django, React or Node.js-based services +- Keep Tkinter as legacy/prototype unless it is no longer useful +- Add more metrics, instruments and prediction features +- Improve report templates and report structure +- Introduce first LLM-based summaries for generated reports +- Add first performance tests +- Introduce Snyk or another dependency/security scanning workflow +- Improve code quality, test coverage and maintainability -Outcome: -ARGUS has a clearer data architecture and starts moving from local prototype toward a scalable analytics application. +**Outcome:** -### Sprint 4 — Cloud, Pipelines & Portfolio-Grade Data Engineering +ARGUS becomes a more scalable local analytics application. +It can process more instruments, produce better reports, provide first automated summaries and offer more reliable insight into market data. -**Status:** Future +--- -Turn ARGUS into a stronger end-to-end data engineering project. +### Sprint 4 — Extended Analysis & Cloud-Ready Foundation -Scope: +**Status:** Planned -- Docker / Docker Compose -- Scheduled data ingestion -- Cloud storage or cloud database -- CI/CD improvements -- Data quality checks -- Basic pipeline orchestration -- Reporting layer -- Architecture diagram -- Deployment documentation +Prepare ARGUS for deeper analysis, cloud interaction and future portfolio-assistant workflows while keeping the local product usable and transparent. -Target workflow: +**Scope:** -```text -API → Ingestion → Storage → Transformation → Analysis → Visualization → CI/CD -``` +- Add Docker Compose for a more complete local development setup +- Introduce a first Azure connection, focused on simple storage or artifact exchange +- Improve the LLM workflow +- Introduce a first RAG-ready structure for reports, notes, documentation and stored analysis artifacts +- Add data quality checks +- Improve caching and efficient storage of market data +- Add more export options for users +- Add more metrics and better metadata visualization +- Improve transparency around data sources, generated reports and analysis assumptions +- Prepare clear interfaces for future cloud and assistant workflows -### Sprint 5 — AI-Assisted Research & Agentic Monitoring +**Outcome:** -**Status:** Future vision +ARGUS becomes ready to interact with a future cloud layer. +The application can produce clearer, more transparent market analysis and prepares the foundation for retrieval-based workflows, stronger automation and future ARGUS Core integration. -Add AI support only after the data, storage, service and reporting layers are stable. +--- -Scope: +### Sprint 5 — Cloud Interaction & Agentic Monitoring Foundation -- LLM-assisted report summaries -- Explanation of unusual movements -- RAG over stored market notes, reports or documentation -- Agentic checks for data quality, anomalies and recurring market scans -- Human-in-the-loop signal review -- Automated monitoring workflows +**Status:** Planned -Outcome: +Start the first cloud-connected ARGUS workflows and introduce the foundation for monitoring, agentic checks and strategy-support features. + +**Scope:** + +- Add first cloud workflows that extend local analysis +- Connect local ARGUS workflows with the first cloud-side services +- Extend RAG over stored market notes, reports, documentation and analysis artifacts +- Add agentic checks for: + + - data quality + - anomalies + - recurring market scans + - report consistency +- Add first human-in-the-loop review workflows for signals or strategy ideas +- Add automated monitoring workflows +- Prepare the first foundations for: + + - paper trading + - backtesting + - controlled strategy evaluation + - future portfolio-assistant workflows + +**Outcome:** -ARGUS starts behaving like its name: a system that continuously watches market data, evaluates it and helps generate useful signals. +ARGUS and the first cloud-side services begin to interact. +ARGUS becomes useful not only as an analytics and reporting tool, but also as the first foundation for monitoring, strategy evaluation and controlled market-research workflows. diff --git a/docs/storage.md b/docs/storage.md new file mode 100644 index 0000000..27ce106 --- /dev/null +++ b/docs/storage.md @@ -0,0 +1,154 @@ +# ARGUS Storage Layer + +ARGUS uses DuckDB as the local storage layer for normalized market data. + +The storage layer stores ARGUS-internal market data structures and provides reusable historical data for analytics, charts, dashboards and reports. + +The storage design follows the direction described in [`docs/research-databases-and-storage.md`](research-databases-and-storage.md). + +## Storage Workflow + +ARGUS uses a storage-first workflow for historical market data. + +```text +User / GUI / Analytics request + ↓ +Market data service + ↓ +Check DuckDB storage + ↓ +If data exists: + read stored data + return it for analytics, charts or reports + +If data is missing: + fetch data from a client/API + normalize the response into ARGUS-internal data + return the normalized data + save the normalized data in DuckDB +``` + +DuckDB is used to avoid unnecessary repeated API calls and to make historical market data reusable across analytics, dashboard and reporting workflows. + +Fresh API data can be used immediately after normalization and is also persisted so future requests can use the local storage layer first. + +## Schema Overview + +The first storage schema is based on three related entities: + +```text +data_sources +instruments +price_bars +``` + +### `data_sources` + +Stores where market data came from. + +Examples: + +```text +yfinance +ExchangeRate API +Frankfurter +FRED +``` + +Each source describes a provider or API that can deliver market, FX or macro data. + +### `instruments` + +Stores what ARGUS can analyze. + +Examples: + +```text +EUR/USD +AAPL +SPY +BTC-USD +``` + +An instrument represents the internal ARGUS identity of an asset, currency pair, ETF, index or other market object. + +Provider-specific symbols should be normalized before storage. For example: + +```text +yfinance provider symbol: EURUSD=X +ARGUS instrument symbol: EUR/USD +``` + +### `price_bars` + +Stores historical time-series values in an OHLCV-ready structure. + +A price bar belongs to: + +```text +one data source +one instrument +one timestamp +one timeframe +``` + +FX rates are stored as `close` values. + +For simple FX data, the remaining OHLCV fields can stay empty. For broader market data, the same structure can store open, high, low, close, adjusted close and volume values. + +The combination of source, instrument, timestamp and timeframe identifies a unique stored price bar. + +## Internal Models and Storage + +ARGUS uses internal domain models before data is stored: + +```text +DataSource +Instrument +PriceBar +``` + +These models describe the meaning of the data inside ARGUS. + +The storage layer translates these internal models into DuckDB tables: + +```text +DataSource -> data_sources +Instrument -> instruments +PriceBar -> price_bars +``` + +In Python, a `PriceBar` references a `DataSource` and an `Instrument`. + +In DuckDB, this relationship is stored through IDs: + +```text +price_bars.source_id -> data_sources.id +price_bars.instrument_id -> instruments.id +``` + +This keeps the database normalized while still allowing ARGUS to work with meaningful internal models in Python. + +## Reading Stored Data + +Stored price bars can be read by: + +```text +source +instrument +start date +end date +``` + +The storage layer joins `price_bars`, `data_sources` and `instruments` so that stored IDs become readable market data again. + +Read operations return tabular data that can be used by: + +```text +analytics +charts +dashboards +reports +``` + +This allows ARGUS to process stored historical data without depending on raw API response structures. diff --git a/pyproject.toml b/pyproject.toml index e2dc784..a8ac7b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ dependencies = [ "numpy", "matplotlib", "yfinance", + "duckdb", ] [project.optional-dependencies] diff --git a/src/argus/clients/exchangerate_client.py b/src/argus/clients/exchangerate_client.py index 8ea5e89..d899718 100644 --- a/src/argus/clients/exchangerate_client.py +++ b/src/argus/clients/exchangerate_client.py @@ -24,6 +24,16 @@ def get_rates(curr1: str, curr2: str): resp.raise_for_status() payload = resp.json() + if payload["result"] == "success": + data["result"] = "success" + data["conversion_rate"] = payload["conversion_rate"] + return data + else: + data["result"] = "error" + data["error_type"] = payload.get("error_type") + check_error(data["error_type"]) + return None + except req.exceptions.Timeout: print("API hat zu lange gebraucht.") return None @@ -41,16 +51,6 @@ def get_rates(curr1: str, curr2: str): print("Unerwartete API-Antwortstruktur.") return None - if payload.get("result") == "success": - data["result"] = "success" - data["conversion_rate"] = payload.get("conversion_rate") - return data - else: - data["result"] = "error" - data["error_type"] = payload.get("error_type") - check_error(data["error_type"]) - return None - def check_error(err_type: str) -> None: """ diff --git a/src/argus/domain/internal_models.py b/src/argus/domain/internal_models.py new file mode 100644 index 0000000..3b7630e --- /dev/null +++ b/src/argus/domain/internal_models.py @@ -0,0 +1,34 @@ +from dataclasses import dataclass +from datetime import date + + +@dataclass +class DataSource: + name: str + provider_kind: str + requires_api_key: bool = False + + +@dataclass +class Instrument: + symbol: str + name: str + asset_class: str + currency: str | None = None + exchange: str | None = None + base_currency: str | None = None + quote_currency: str | None = None + + +@dataclass +class PriceBar: + source: DataSource + instrument: Instrument + timestamp: date + timeframe: str + close: float + open: float | None = None + high: float | None = None + low: float | None = None + adjusted_close: float | None = None + volume: float | None = None diff --git a/src/argus/storage/__init__.py b/src/argus/storage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/argus/storage/database.py b/src/argus/storage/database.py new file mode 100644 index 0000000..ea7128c --- /dev/null +++ b/src/argus/storage/database.py @@ -0,0 +1,271 @@ +import duckdb +from datetime import date +import pandas as pd +from argus.domain.internal_models import DataSource, PriceBar, Instrument + + +def initialize_database(database_path: str) -> None: + """ + Initialize the DuckDB database schema. + + Creates the required sequences and tables for data sources, + instruments, and price bars. + + Args: + database_path (str): Path to the DuckDB database file. + + Returns: + None + """ + queries = [ + "CREATE SEQUENCE IF NOT EXISTS data_sources_id_seq;", + "CREATE SEQUENCE IF NOT EXISTS instruments_id_seq;", + "CREATE SEQUENCE IF NOT EXISTS price_bars_id_seq;", + """ + CREATE TABLE IF NOT EXISTS data_sources ( + id INTEGER PRIMARY KEY DEFAULT nextval('data_sources_id_seq'), + name TEXT NOT NULL UNIQUE, + provider_kind TEXT NOT NULL, + requires_api_key BOOLEAN NOT NULL + ); + """, + """ + CREATE TABLE IF NOT EXISTS instruments ( + id INTEGER PRIMARY KEY DEFAULT nextval('instruments_id_seq'), + symbol TEXT NOT NULL UNIQUE, + name TEXT NOT NULL, + asset_class TEXT NOT NULL, + currency TEXT, + exchange TEXT, + base_currency TEXT, + quote_currency TEXT + ); + """, + """ + CREATE TABLE IF NOT EXISTS price_bars ( + id INTEGER PRIMARY KEY DEFAULT nextval('price_bars_id_seq'), + source_id INTEGER NOT NULL, + instrument_id INTEGER NOT NULL, + timestamp DATE NOT NULL, + timeframe TEXT NOT NULL, + close DOUBLE NOT NULL, + open DOUBLE, + high DOUBLE, + low DOUBLE, + adjusted_close DOUBLE, + volume DOUBLE, + FOREIGN KEY (source_id) REFERENCES data_sources (id), + FOREIGN KEY (instrument_id) REFERENCES instruments (id), + UNIQUE (source_id, instrument_id, timestamp, timeframe) + ); + """, + ] + + connection = duckdb.connect(database_path) + try: + for query in queries: + connection.execute(query) + finally: + connection.close() + + +def get_or_create_source(connection, source: DataSource) -> int: + """ + Get an existing data source ID or create a new data source. + + Searches for a data source by name. If it already exists, its ID is + returned. Otherwise, the data source is inserted and the new ID is + returned. + + Args: + connection: Active DuckDB connection. + source (DataSource): Data source model containing provider metadata. + + Returns: + int: Database ID of the existing or newly created data source. + + Raises: + ValueError: If the data source could not be inserted or found. + """ + insert_query = """ + INSERT INTO data_sources (name, provider_kind, requires_api_key) + VALUES (?,?,?) + ON CONFLICT DO NOTHING; + """ + search_query = """ + SELECT id FROM data_sources + WHERE name=? + """ + + result = connection.execute( + query=search_query, + parameters=[source.name], + ).fetchone() + if result is not None: + return result[0] + + connection.execute( + query=insert_query, + parameters=[source.name, source.provider_kind, source.requires_api_key], + ) + + result = connection.execute( + query=search_query, + parameters=[source.name], + ).fetchone() + + if result is None: + raise ValueError("Data source could not be inserted.") + + return result[0] + + +def get_or_create_instrument(connection, instrument: Instrument) -> int: + """ + Get an existing instrument ID or create a new instrument. + + Searches for an instrument by symbol. If it already exists, its ID is + returned. Otherwise, the instrument is inserted and the new ID is + returned. + + Args: + connection: Active DuckDB connection. + instrument (Instrument): Instrument model containing symbol and + asset metadata. + + Returns: + int: Database ID of the existing or newly created instrument. + + Raises: + ValueError: If the instrument could not be inserted or found. + """ + insert_query = """ + INSERT INTO instruments ( + symbol, + name, + asset_class, + currency, + exchange, + base_currency, + quote_currency) + VALUES (?,?,?,?,?,?,?) + ON CONFLICT DO NOTHING; + """ + search_query = """ + SELECT id FROM instruments + WHERE symbol=? + """ + + result = connection.execute( + query=search_query, + parameters=[instrument.symbol], + ).fetchone() + if result is not None: + return result[0] + + connection.execute( + query=insert_query, + parameters=[ + instrument.symbol, + instrument.name, + instrument.asset_class, + instrument.currency, + instrument.exchange, + instrument.base_currency, + instrument.quote_currency, + ], + ) + result = connection.execute( + query=search_query, + parameters=[instrument.symbol], + ).fetchone() + + if result is None: + raise ValueError("Instrument could not be inserted.") + + return result[0] + + +def insert_price_bar(db: str, price_bar: PriceBar) -> None: + insert_query = """ + INSERT INTO price_bars ( + source_id, + instrument_id, + timestamp, + timeframe, + close, + open, + high, + low, + adjusted_close, + volume + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT DO NOTHING; + """ + connection = duckdb.connect(db) + try: + source_id = get_or_create_source(connection, price_bar.source) + instrument_id = get_or_create_instrument(connection, price_bar.instrument) + connection.execute( + query=insert_query, + parameters=[ + source_id, + instrument_id, + price_bar.timestamp, + price_bar.timeframe, + price_bar.close, + price_bar.open, + price_bar.high, + price_bar.low, + price_bar.adjusted_close, + price_bar.volume, + ], + ) + finally: + connection.close() + + +def read_price_bars( + db: str, + source: DataSource, + instrument: Instrument, + start_date: date, + end_date: date, +) -> pd.DataFrame: + + search_query = """ + SELECT + data_sources.name AS source_name, + instruments.symbol AS instrument_symbol, + price_bars.timestamp, + price_bars.timeframe, + price_bars.open, + price_bars.high, + price_bars.low, + price_bars.close, + price_bars.adjusted_close, + price_bars.volume + FROM price_bars + JOIN data_sources ON price_bars.source_id = data_sources.id + JOIN instruments ON price_bars.instrument_id = instruments.id + WHERE data_sources.name = ? + AND instruments.symbol = ? + AND price_bars.timestamp BETWEEN ? AND ? + ORDER BY price_bars.timestamp; + """ + + connection = duckdb.connect(db) + try: + result = connection.execute( + query=search_query, + parameters=[ + source.name, + instrument.symbol, + start_date, + end_date, + ], + ).df() + finally: + connection.close() + return result diff --git a/tests/test_exchangerate_client.py b/tests/test_exchangerate_client.py index faf0864..132a8a8 100644 --- a/tests/test_exchangerate_client.py +++ b/tests/test_exchangerate_client.py @@ -3,7 +3,7 @@ from argus.clients.exchangerate_client import get_rates, check_error -def test_check_currency_timeout(monkeypatch): +def test_check_currency_timeout(monkeypatch, capsys): def test_get_resp(url, timeout): raise req.exceptions.Timeout() @@ -12,8 +12,11 @@ def test_get_resp(url, timeout): data = get_rates("EUR", "USD") assert data is None + captured = capsys.readouterr() + assert "API hat zu lange gebraucht." in captured.out + -def test_check_currency_connection_error(monkeypatch): +def test_check_currency_connection_error(monkeypatch, capsys): def test_get_resp(url, timeout): raise req.exceptions.ConnectionError() @@ -22,8 +25,11 @@ def test_get_resp(url, timeout): data = get_rates("EUR", "USD") assert data is None + captured = capsys.readouterr() + assert "Keine Verbindung zur API." in captured.out + -def test_check_currency_request_exception(monkeypatch): +def test_check_currency_request_exception(monkeypatch, capsys): def test_get_resp(url, timeout): raise req.exceptions.RequestException("Testfehler") @@ -32,8 +38,11 @@ def test_get_resp(url, timeout): data = get_rates("EUR", "USD") assert data is None + captured = capsys.readouterr() + assert "Request fehlgeschlagen:" in captured.out + -def test_check_currency_value_error(monkeypatch): +def test_check_currency_value_error(monkeypatch, capsys): test_resp = Mock() test_resp.raise_for_status.return_value = None test_resp.json.side_effect = ValueError("Ungültige JSON-Antwort") @@ -46,14 +55,16 @@ def test_get_resp(url, timeout): data = get_rates("EUR", "USD") assert data is None + captured = capsys.readouterr() + assert "Fehler beim Verarbeiten der API-Antwort." in captured.out + -def test_check_currency_key_error(monkeypatch): +def test_check_currency_key_error(monkeypatch, capsys): test_resp = Mock() test_resp.raise_for_status.return_value = None test_resp.json.return_value = { - "result": "", + "result": "success", # not passing "success" bypases the "conversion_rate" checking "error_type": "", - # "conversion_rate" fehlt absichtlich } def test_get_resp(url, timeout): @@ -64,6 +75,9 @@ def test_get_resp(url, timeout): data = get_rates("EUR", "USD") assert data is None + captured = capsys.readouterr() + assert "Unerwartete API-Antwortstruktur." in captured.out + def test_check_currency_valid(monkeypatch): test_resp = Mock() @@ -83,7 +97,7 @@ def test_get_resp(url, timeout): assert data == {"result": "success", "error_type": "", "conversion_rate": 1.2} -def test_check_currency_invalid(monkeypatch): +def test_check_currency_invalid(monkeypatch, capsys): test_resp = Mock() test_resp.raise_for_status.return_value = None test_resp.json.return_value = { @@ -100,6 +114,9 @@ def test_get_resp(url, timeout): data = get_rates("EUR", "USD") assert data is None + captured = capsys.readouterr() + assert "Invalid request! Please try again later." in captured.out + def test_check_error(capsys): check_error("unsupported-code") @@ -123,3 +140,7 @@ def test_check_error(capsys): captured.out == "Request limit reached! Please try again later or upgrade to exchangerate-api.com.\n" ) + + check_error("Some unknown Error") + captured = capsys.readouterr() + assert captured.out == "" diff --git a/tests/test_internal_models.py b/tests/test_internal_models.py new file mode 100644 index 0000000..97df4c6 --- /dev/null +++ b/tests/test_internal_models.py @@ -0,0 +1,101 @@ +from argus.domain.internal_models import DataSource, Instrument, PriceBar +from datetime import date + + +def test_data_source_can_be_created() -> None: + source = DataSource( + name="yfinance", + provider_kind="fx_rates", + ) + + assert source.name == "yfinance" + assert source.provider_kind == "fx_rates" + assert source.requires_api_key is False + + +def test_instrument_can_be_created() -> None: + instrument = Instrument( + symbol="EUR/USD", + name="Euro / US Dollar", + asset_class="fx", + base_currency="EUR", + quote_currency="USD", + ) + + assert instrument.symbol == "EUR/USD" + assert instrument.name == "Euro / US Dollar" + assert instrument.asset_class == "fx" + assert instrument.base_currency == "EUR" + assert instrument.quote_currency == "USD" + assert instrument.currency is None + assert instrument.exchange is None + + +def test_rate_bar_can_be_created() -> None: + source = DataSource( + name="yfinance", + provider_kind="fx_rates", + ) + + instrument_rate = Instrument( + symbol="EUR/USD", + name="Euro / US Dollar", + asset_class="fx", + base_currency="EUR", + quote_currency="USD", + ) + + price_bar = PriceBar( + source=source, + instrument=instrument_rate, + timestamp=date(2026, 1, 1), + timeframe="1d", + close=1.89, + ) + + assert price_bar.source == source + assert price_bar.instrument == instrument_rate + assert price_bar.timestamp == date(2026, 1, 1) + assert price_bar.timeframe == "1d" + assert price_bar.close == 1.89 + assert price_bar.open is None + assert price_bar.high is None + assert price_bar.low is None + assert price_bar.adjusted_close is None + assert price_bar.volume is None + + +def test_stock_ohlcv_bar_can_be_created() -> None: + source = DataSource( + name="yfinance", + provider_kind="market_prices", + ) + + instrument = Instrument( + symbol="AAPL", + name="Apple Inc.", + asset_class="stock", + currency="USD", + exchange="NASDAQ", + ) + + price_bar = PriceBar( + source=source, + instrument=instrument, + timestamp=date(2026, 1, 1), + timeframe="1d", + open=187.15, + high=188.44, + low=183.89, + close=185.64, + adjusted_close=184.25, + volume=50_200_000, + ) + + assert price_bar.instrument.symbol == "AAPL" + assert price_bar.open == 187.15 + assert price_bar.high == 188.44 + assert price_bar.low == 183.89 + assert price_bar.close == 185.64 + assert price_bar.adjusted_close == 184.25 + assert price_bar.volume == 50_200_000 diff --git a/tests/test_storage_database.py b/tests/test_storage_database.py new file mode 100644 index 0000000..d513008 --- /dev/null +++ b/tests/test_storage_database.py @@ -0,0 +1,222 @@ +from datetime import date + +import duckdb + +from argus.domain.internal_models import DataSource, Instrument, PriceBar +from argus.storage.database import ( + initialize_database, + insert_price_bar, + read_price_bars, +) + + +def test_initialize_database_creates_required_tables(tmp_path): + db = tmp_path / "test.duckdb" + + initialize_database(db) + connection = duckdb.connect(db) + tables = connection.execute("SHOW TABLES;").fetchall() + connection.close() + table_names = {row[0] for row in tables} + + assert "data_sources" in table_names + assert "instruments" in table_names + assert "price_bars" in table_names + + +def test_data_is_inserted(tmp_path): + source = DataSource( + name="Yahoo", provider_kind="yfinance_api", requires_api_key=False + ) + + instrument = Instrument( + symbol="EUR/USD", + name="EUR - USD Rate", + asset_class="fx", + base_currency="EUR", + quote_currency="USD", + ) + + pricebar = PriceBar( + source=source, + instrument=instrument, + timestamp=date(2026, 1, 1), + timeframe="1d", + close=1.89, + ) + + db = tmp_path / "test.duckdb" + initialize_database(db) + insert_price_bar(db, pricebar) + connection = duckdb.connect(db) + + instrument_count = connection.execute( + "SELECT COUNT(*) FROM instruments;" + ).fetchone() + + source_count = connection.execute("SELECT COUNT(*) FROM data_sources;").fetchone() + + price_bar_count = connection.execute("SELECT COUNT(*) FROM price_bars;").fetchone() + + assert instrument_count is not None + assert source_count is not None + assert price_bar_count is not None + assert instrument_count[0] == 1 + assert source_count[0] == 1 + assert price_bar_count[0] == 1 + + +def test_fx_has_correct_format(tmp_path): + source = DataSource( + name="Yahoo", provider_kind="yfinance_api", requires_api_key=False + ) + + instrument = Instrument( + symbol="EUR/USD", + name="EUR - USD Rate", + asset_class="fx", + base_currency="EUR", + quote_currency="USD", + ) + + pricebar = PriceBar( + source=source, + instrument=instrument, + timestamp=date(2026, 1, 1), + timeframe="1d", + close=1.89, + ) + + db = tmp_path / "test.duckdb" + initialize_database(db) + insert_price_bar(db, pricebar) + connection = duckdb.connect(db) + + price_bar_fx = connection.execute("SELECT * FROM price_bars;").fetchone() + connection.close() + + assert price_bar_fx is not None + assert price_bar_fx[0] == 1 + assert price_bar_fx[1] == 1 + assert price_bar_fx[2] == 1 + assert price_bar_fx[3] == date(2026, 1, 1) + assert price_bar_fx[4] == "1d" + assert price_bar_fx[5] == 1.89 + assert price_bar_fx[6] is None + assert price_bar_fx[7] is None + assert price_bar_fx[8] is None + assert price_bar_fx[9] is None + assert price_bar_fx[10] is None + + +def test_duplicates_are_ignored(tmp_path): + source = DataSource( + name="Yahoo", provider_kind="yfinance_api", requires_api_key=False + ) + + instrument = Instrument( + symbol="EUR/USD", + name="EUR - USD Rate", + asset_class="fx", + base_currency="EUR", + quote_currency="USD", + ) + + pricebar = PriceBar( + source=source, + instrument=instrument, + timestamp=date(2026, 1, 1), + timeframe="1d", + close=1.89, + ) + + db = tmp_path / "test.duckdb" + initialize_database(db) + insert_price_bar(db, pricebar) + insert_price_bar(db, pricebar) + connection = duckdb.connect(db) + count = connection.execute("SELECT COUNT(*) FROM price_bars;").fetchone() + + assert count is not None + assert count[0] == 1 + + +def test_read_price_bars_returns_matching_data(tmp_path): + source = DataSource( + name="Yahoo", + provider_kind="yfinance_api", + requires_api_key=False, + ) + + instrument = Instrument( + symbol="EUR/USD", + name="EUR - USD Rate", + asset_class="fx", + base_currency="EUR", + quote_currency="USD", + ) + + pricebar = PriceBar( + source=source, + instrument=instrument, + timestamp=date(2026, 1, 1), + timeframe="1d", + close=1.89, + ) + + db = tmp_path / "test.duckdb" + initialize_database(db) + insert_price_bar(db, pricebar) + + result = read_price_bars( + db=db, + source=source, + instrument=instrument, + start_date=date(2026, 1, 1), + end_date=date(2026, 1, 31), + ) + + assert result.empty is False + assert len(result) == 1 + assert result.iloc[0]["source_name"] == "Yahoo" + assert result.iloc[0]["instrument_symbol"] == "EUR/USD" + assert result.iloc[0]["timeframe"] == "1d" + assert result.iloc[0]["close"] == 1.89 + + +def test_read_price_bars_returns_empty_dataframe_for_missing_range(tmp_path): + source = DataSource( + name="Yahoo", + provider_kind="yfinance_api", + requires_api_key=False, + ) + + instrument = Instrument( + symbol="EUR/USD", + name="EUR - USD Rate", + asset_class="fx", + base_currency="EUR", + quote_currency="USD", + ) + + pricebar = PriceBar( + source=source, + instrument=instrument, + timestamp=date(2026, 1, 1), + timeframe="1d", + close=1.89, + ) + + db = tmp_path / "test.duckdb" + initialize_database(db) + insert_price_bar(db, pricebar) + + result = read_price_bars( + db=db, + source=source, + instrument=instrument, + start_date=date(2027, 1, 1), + end_date=date(2027, 1, 31), + ) + + assert result.empty is True diff --git a/tests/test_timeseries_service.py b/tests/test_timeseries_service.py index 7dd3c9f..cd5c97a 100644 --- a/tests/test_timeseries_service.py +++ b/tests/test_timeseries_service.py @@ -23,8 +23,9 @@ def test_get_a_full_timeseries(): "max_rate": [1.1055831909179688], } result = prepare_trend_analysis(test_curr, test_start, test_end, test_interval) - if result is None: - return False + + assert result is not None + result_df, result_dict = result result_df["date"] = result_df["date"].astype("str") result_dict["min_date"] = [str(result_dict["min_date"][0])] diff --git a/tests/test_validation_domain.py b/tests/test_validation_domain.py index a5bd41f..0166741 100644 --- a/tests/test_validation_domain.py +++ b/tests/test_validation_domain.py @@ -7,9 +7,13 @@ def test_op_is_valid(): - data = is_valid_op("+") - assert data is True + assert is_valid_op("+") is True + assert is_valid_op("-") is True + assert is_valid_op("*") is True + assert is_valid_op("/") is True + assert is_valid_op("%") is True + assert is_valid_op("**") is True def test_op_is_not_valid(): diff --git a/tests/test_yfinance_client.py b/tests/test_yfinance_client.py index faf15fc..6201b19 100644 --- a/tests/test_yfinance_client.py +++ b/tests/test_yfinance_client.py @@ -72,7 +72,7 @@ def test_error_raise(monkeypatch): def fake_yfinance_download( tickers=test_curr, start=test_start, end=test_end, interval=test_interval ): - return Exception("fake yfinance error") + raise Exception("fake yfinance error") monkeypatch.setattr("yfinance.download", fake_yfinance_download)