Skip to content

Commit 529aa97

Browse files
authored
Merge pull request #1 from quantarmyz/claude/fix-bugs-improve-usability-V0PCx
Fix bugs, update dependencies, and improve usability
2 parents c8e5c88 + 2d20250 commit 529aa97

12 files changed

Lines changed: 290 additions & 235 deletions

.gitignore

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,21 @@
1+
# Credentials
12
DockerSource/.env
3+
*.env
4+
5+
# Jupyter
6+
.ipynb_checkpoints/
7+
*/.ipynb_checkpoints/
8+
**/.ipynb_checkpoints/
9+
10+
# Python
11+
__pycache__/
12+
*.py[cod]
13+
*.pyo
14+
15+
# Data files
16+
*.h5
17+
*.hdf5
18+
19+
# OS
20+
.DS_Store
21+
Thumbs.db

DockerSource/Dockerfile

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,34 @@ FROM continuumio/miniconda3:23.3.1-0
2121

2222
RUN apt-get update && \
2323
apt-get upgrade -y && \
24-
apt-get install -y --no-install-recommends git nodejs gcc g++ exa nano && \
24+
apt-get install -y --no-install-recommends git nodejs gcc g++ nano && \
25+
apt-get clean && rm -rf /var/lib/apt/lists/* && \
2526
chmod 777 /opt
2627

2728
RUN mkdir /WorkingData
2829
WORKDIR /WorkingData
2930

30-
COPY --chown=${NB_UID}:${NB_GID} req.txt /tmp/
31-
COPY --chown=${NB_UID}:${NB_GID} overrides.json /opt/conda/share/jupyter/lab/settings/overrides.json
31+
COPY req.txt /tmp/
32+
COPY overrides.json /opt/conda/share/jupyter/lab/settings/overrides.json
3233

33-
RUN conda install -c conda-forge jupyterlab ta-lib nodejs jupyterlab-night
34-
RUN pip install --no-cache-dir --requirement /tmp/req.txt
34+
RUN conda install -c conda-forge jupyterlab ta-lib nodejs jupyterlab-night && \
35+
conda clean -afy
36+
RUN pip install --no-cache-dir --requirement /tmp/req.txt
37+
38+
RUN mkdir -p /opt/conda/etc/jupyter && \
39+
echo "c.Completer.use_jedi = False" >> /opt/conda/etc/jupyter/jupyter_notebook_config.py && \
40+
echo "c.ResourceUseDisplay.track_cpu_percent = True" >> /opt/conda/etc/jupyter/jupyter_notebook_config.py
41+
42+
SHELL ["/bin/bash", "-c"]
43+
RUN PYTHON_VERSION=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')") && \
44+
BUNDLES_DIR="/opt/conda/lib/python${PYTHON_VERSION}/site-packages/zipline/data/bundles" && \
45+
mkdir -p /root/.zipline && \
46+
echo "Zipline bundles dir: ${BUNDLES_DIR}"
47+
48+
COPY extension.py /root/.zipline/extension.py
49+
COPY qa_datalake.py /tmp/qa_datalake.py
50+
RUN PYTHON_VERSION=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')") && \
51+
cp /tmp/qa_datalake.py "/opt/conda/lib/python${PYTHON_VERSION}/site-packages/zipline/data/bundles/qa_datalake.py"
52+
COPY .env /root/.env
3553

36-
RUN echo "c.Completer.use_jedi = False" >> /opt/conda/etc/jupyter/jupyter_notebook_config.py
37-
RUN echo "c.ResourceUseDisplay.track_cpu_percent = True" >> /opt/conda/etc/jupyter/jupyter_notebook_config.py
38-
COPY --chown=${NB_UID}:${NB_GID} extension.py /root/.zipline/extension.py
39-
COPY --chown=${NB_UID}:${NB_GID} qa_datalake.py /opt/conda/lib/python3.10/site-packages/zipline/data/bundles/qa_datalake.py
40-
COPY --chown=${NB_UID}:${NB_GID} .env /root/.env
4154
CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"]

DockerSource/qa_datalake.py

Lines changed: 53 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,24 @@
1919
#################################
2020

2121
import pandas as pd
22-
from os import listdir, getenv
22+
from os import getenv
2323
from exchange_calendars import get_calendar
2424
from dotenv import load_dotenv
2525
from arcticdb import Arctic
2626
import warnings
2727

2828
warnings.filterwarnings('ignore')
29-
dotenv_path = '/root/.env'
30-
load_dotenv(dotenv_path)
3129

32-
endpoint = getenv("ENDPOINT")
33-
db = getenv("DB")
34-
access_key = getenv("ACCESS_KEY")
35-
secret_key = getenv("SECRET_KEY")
36-
37-
ac = Arctic(f's3s://{endpoint}:{db}?access={access_key}&secret={secret_key}')
30+
def _get_arctic_connection():
31+
dotenv_path = '/root/.env'
32+
load_dotenv(dotenv_path)
33+
endpoint = getenv("ENDPOINT")
34+
db = getenv("DB")
35+
access_key = getenv("ACCESS_KEY")
36+
secret_key = getenv("SECRET_KEY")
37+
if not all([endpoint, db, access_key, secret_key]):
38+
raise ValueError("Faltan variables de entorno. Revisa tu archivo .env (ENDPOINT, DB, ACCESS_KEY, SECRET_KEY)")
39+
return Arctic(f's3s://{endpoint}:{db}?access={access_key}&secret={secret_key}')
3840

3941

4042
def bse_data(environ,
@@ -51,73 +53,75 @@ def bse_data(environ,
5153

5254
symbols = ['XLE.US','XLF.US','XLI.US','XLK.US','XLP.US','XLU.US','XLV.US','XLY.US','XLB.US','XLC.US','ITA.US']
5355
if not symbols:
54-
5556
raise ValueError("No se han encontrado TICKERS en el QA DATALAKE")
56-
57-
divs_splits = {"divs": pd.DataFrame(columns=["sid","amount","ex_date","record_date","declared_date","pay_date",]),"splits": pd.DataFrame(columns=["sid", "ratio", "effective_date"]),}
58-
57+
58+
divs_splits = {
59+
"divs": pd.DataFrame(columns=["sid","amount","ex_date","record_date","declared_date","pay_date"]),
60+
"splits": pd.DataFrame(columns=["sid", "ratio", "effective_date"]),
61+
}
62+
5963
metadata = pd.DataFrame(columns=('start_date','end_date','auto_close_date','symbol','exchange'))
6064
sessions = calendar.sessions_in_range(start_session, end_session)
6165

62-
daily_bar_writer.write(process_stocks(symbols, sessions, metadata, divs_splits))
66+
ac = _get_arctic_connection()
67+
daily_bar_writer.write(process_stocks(ac, symbols, sessions, metadata, divs_splits))
6368

6469
metadata["exchange"] = "QAX"
6570
exchange = {'exchange': 'QAX', 'canonical_name': 'QUANTARMY BACKTEST', 'country_code': 'US'}
66-
exchange_df = pd.DataFrame(exchange, index = [0])
67-
68-
71+
exchange_df = pd.DataFrame(exchange, index=[0])
6972

7073
divs_splits["divs"]["sid"] = divs_splits["divs"]["sid"].astype(int)
7174
divs_splits["splits"]["sid"] = divs_splits["splits"]["sid"].astype(int)
72-
daily_bar_writer.write(process_stocks(symbols, sessions, metadata, divs_splits))
7375
asset_db_writer.write(equities=metadata, exchanges=exchange_df)
7476
adjustment_writer.write(splits=divs_splits["splits"], dividends=divs_splits["divs"])
7577

76-
def process_stocks(symbols, sessions, metadata, divs_splits):
78+
def process_stocks(ac, symbols, sessions, metadata, divs_splits):
7779
my_cal = get_calendar('NYSE')
7880
prices = ac.get_library('prices.etfs.us.stable')
81+
divs_lib = ac.get_library('divs.etfs.us.stable')
82+
splits_lib = ac.get_library('splits.etfs.us.stable')
83+
7984
for sid, symbol in enumerate(symbols):
80-
print('[QA DATALAKE CARNIVORE ] ||| Loading {}...'.format(symbol))
85+
print('[QA DATALAKE] Loading {}...'.format(symbol))
8186
df = prices.read(symbol).data
8287
df = df['2010':]
8388
start_date = df.index[0]
84-
end_date = df.index[-1]
85-
sessions = my_cal.sessions_in_range(start_date, end_date)
86-
df = df[df.index.isin(sessions)]
87-
df = df.reindex(sessions.tz_localize(None))[start_date:end_date] #tz_localize(None)
88-
df.fillna(method='ffill', inplace=True)
89-
df.dropna(inplace=True)
89+
end_date = df.index[-1]
90+
sym_sessions = my_cal.sessions_in_range(start_date, end_date)
91+
df = df[df.index.isin(sym_sessions)]
92+
df = df.reindex(sym_sessions.tz_localize(None))[start_date:end_date]
93+
df = df.ffill()
94+
df.dropna(inplace=True)
9095
ac_date = end_date + pd.Timedelta(days=1)
9196
metadata.loc[sid] = start_date, end_date, ac_date, symbol, 'QAX'
9297

93-
if ac.get_library('divs.etfs.us.stable').has_symbol(symbol):
94-
data_divs = ac['divs.etfs.us.stable'].read(symbol).data
95-
data_divs = data_divs.reset_index()
96-
div = pd.DataFrame()
97-
div['ex_date'] = data_divs['date']
98-
div['record_date'] = data_divs['recordDate']
99-
div['declared_date'] = data_divs['declarationDate']
100-
div['pay_date'] = data_divs['paymentDate']
101-
div['amount'] = data_divs['value']
102-
div['sid'] = sid
103-
98+
if divs_lib.has_symbol(symbol):
99+
data_divs = divs_lib.read(symbol).data.reset_index()
100+
div = pd.DataFrame({
101+
'ex_date': data_divs['date'],
102+
'record_date': data_divs['recordDate'],
103+
'declared_date': data_divs['declarationDate'],
104+
'pay_date': data_divs['paymentDate'],
105+
'amount': data_divs['value'],
106+
'sid': sid,
107+
})
104108
divs = divs_splits['divs']
105109
ind = pd.Index(range(divs.shape[0], divs.shape[0] + div.shape[0]))
106110
div.set_index(ind, inplace=True)
107111
divs_splits["divs"] = pd.concat([divs, div], axis=0)
108-
print('[QA DATALAKE CARNIVORE] DIVS INFO ADDED OVER',symbol)
109-
110-
if ac.get_library('splits.etfs.us.stable').has_symbol(symbol):
111-
data_splits = ac['splits.etfs.us.stable'].read(symbol).data
112-
data_splits = data_splits.reset_index()
113-
split = pd.DataFrame()
114-
split['effective_date'] = data_splits['date']
115-
split['ratio'] = data_splits['split']
116-
split['sid'] = sid
112+
print('[QA DATALAKE] DIVS loaded for', symbol)
113+
114+
if splits_lib.has_symbol(symbol):
115+
data_splits = splits_lib.read(symbol).data.reset_index()
116+
split = pd.DataFrame({
117+
'effective_date': data_splits['date'],
118+
'ratio': data_splits['split'],
119+
'sid': sid,
120+
})
117121
splits = divs_splits["splits"]
118-
index = pd.Index(range(splits.shape[0], splits.shape[0] + split.shape[0]))
122+
index = pd.Index(range(splits.shape[0], splits.shape[0] + split.shape[0]))
119123
split.set_index(index, inplace=True)
120124
divs_splits["splits"] = pd.concat([splits, split], axis=0)
121-
print('[QA DATALAKE CARNIVORE] SPLIT INFO ADDED OVER',symbol)
122-
125+
print('[QA DATALAKE] SPLITS loaded for', symbol)
126+
123127
yield sid, df

DockerSource/req.txt

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,34 @@
1+
# JupyterLab Extensions
12
jupyterlab_materialdarker
2-
jupyterlab-git
3+
jupyterlab-git
34
jupyterlab-fonts
45
jupyterlab-lsp
56
python-lsp-server[all]
67
jupyterlab-system-monitor
78
jupyterlab_execute_time
9+
10+
# Data Analysis
811
pandas
912
numpy
13+
scipy
14+
scikit-learn
1015
matplotlib
1116
plotly
1217
seaborn
13-
yfinance
1418
mplcyberpunk
19+
pyarrow
20+
21+
# Market Data & Storage
22+
yfinance
1523
arcticdb
24+
25+
# Backtesting
1626
zipline-reloaded
1727
pyfolio-reloaded
1828
alphalens-reloaded
1929
empyrical-reloaded
2030
exchange_calendars
31+
32+
# Utilities
2133
python-dotenv
22-
logbook
23-
pyarrow
34+
logbook

docker-compose.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@
1313
### QUANTARMY.COM - PYTHONPARATRADING.COM
1414
#################################
1515

16-
# Dockerfile.
16+
# docker-compose.yml
1717
# - Lanza el notebook en un servicio localhost:8888
18-
# - Testing pasword : testing
18+
# - Password por defecto: testing
1919

20-
version: '3'
2120
services:
2221
notebook:
2322
build:
@@ -28,4 +27,5 @@ services:
2827
environment:
2928
JUPYTER_TOKEN: "testing"
3029
volumes:
31-
- ./quantarmy_repo:/WorkingData
30+
- ./quantarmy_repo:/WorkingData
31+
restart: unless-stopped

quantarmy_repo/backtest/01-bt-zipline-variance.ipynb

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
{
22
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "hkyb1iz1bfj",
6+
"source": "# Backtest: Annualized Volatility Strategy\nEstrategia basada en volatilidad anualizada. Selecciona los N activos con menor volatilidad para posiciones long.",
7+
"metadata": {}
8+
},
39
{
410
"cell_type": "code",
511
"execution_count": 1,
@@ -655,22 +661,6 @@
655661
"source": [
656662
"positions.index"
657663
]
658-
},
659-
{
660-
"cell_type": "code",
661-
"execution_count": null,
662-
"id": "636793c4-645c-4ed3-9e67-184633e8c3c0",
663-
"metadata": {},
664-
"outputs": [],
665-
"source": []
666-
},
667-
{
668-
"cell_type": "code",
669-
"execution_count": null,
670-
"id": "6a6b7743-6114-4ba5-a9f7-5ef672d2f54c",
671-
"metadata": {},
672-
"outputs": [],
673-
"source": []
674664
}
675665
],
676666
"metadata": {
@@ -694,4 +684,4 @@
694684
},
695685
"nbformat": 4,
696686
"nbformat_minor": 5
697-
}
687+
}

0 commit comments

Comments
 (0)