forked from OpenMS/streamlit-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
295 lines (257 loc) · 14.2 KB
/
Dockerfile
File metadata and controls
295 lines (257 loc) · 14.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# This Dockerfile builds OpenMS, the TOPP tools, pyOpenMS and thidparty tools.
# It also adds a basic streamlit server that serves a pyOpenMS-based app.
# hints:
# build image and give it a name (here: streamlitapp) with: docker build --no-cache -t streamlitapp:latest --build-arg GITHUB_TOKEN=<your-github-token> . 2>&1 | tee build.log
# check if image was build: docker image ls
# run container: docker run -p 8501:8501 streamlitappsimple:latest
# debug container after build (comment out ENTRYPOINT) and run container with interactive /bin/bash shell
# prune unused images/etc. to free disc space (e.g. might be needed on gitpod). Use with care.: docker system prune --all --force
FROM ubuntu:24.04 AS setup-build-system
# ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git
# ARG OPENMS_BRANCH=release/3.5.0
ARG OPENMS_REPO=https://github.com/singjc/OpenMS.git
ARG OPENMS_BRANCH=for/opendiakiosk
ARG PORT=8501
# Streamlit app GitHub user name (to download artifact from).
ARG GITHUB_USER=OpenMS
# Streamlit app GitHub repository name (to download artifact from).
ARG GITHUB_REPO=streamlit-template
USER root
# Install required Ubuntu packages.
RUN apt-get -y update
RUN apt-get install -y --no-install-recommends --no-install-suggests g++ autoconf automake patch libtool make git gpg wget ca-certificates curl jq libgtk2.0-dev openjdk-8-jdk cron
RUN update-ca-certificates
RUN apt-get install -y --no-install-recommends --no-install-suggests libsvm-dev libeigen3-dev coinor-libcbc-dev libglpk-dev libzip-dev zlib1g-dev libxerces-c-dev libbz2-dev libomp-dev libhdf5-dev patchelf
RUN apt-get install -y --no-install-recommends --no-install-suggests libboost-date-time1.83-dev \
libboost-iostreams1.83-dev \
libboost-regex1.83-dev \
libboost-math1.83-dev \
libboost-random1.83-dev
RUN apt-get install -y --no-install-recommends --no-install-suggests qt6-base-dev libqt6svg6-dev libqt6opengl6-dev libqt6openglwidgets6 libgl-dev
RUN set -eux; \
apt-get update; \
wget -qO /tmp/apache-arrow-apt-source-latest-noble.deb \
https://repo1.maven.org/maven2/org/apache/arrow/ubuntu/apache-arrow-apt-source-latest-noble.deb; \
apt-get install -y --no-install-recommends /tmp/apache-arrow-apt-source-latest-noble.deb; \
apt-get update; \
# Pin Arrow 23: find a libparquet-dev candidate that starts with 23.
ARROW_VER=$(apt-cache madison libparquet-dev | awk '{print $3}' | grep -E '^23\.' | head -n1) || true; \
if [ -z "$ARROW_VER" ]; then \
echo "ERROR: no libparquet-dev 23.* available from apt source"; \
apt-cache madison libparquet-dev || true; \
exit 1; \
fi; \
apt-get install -y --no-install-recommends libparquet-dev="$ARROW_VER" libarrow-dev="$ARROW_VER"; \
rm -f /tmp/apache-arrow-apt-source-latest-noble.deb; \
rm -rf /var/lib/apt/lists/*
# Install Github CLI
RUN (type -p wget >/dev/null || (apt-get update && apt-get install wget -y)) \
&& mkdir -p -m 755 /etc/apt/keyrings \
&& wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
&& chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
&& apt-get update \
&& apt-get install gh -y
# Download and install miniforge.
ENV PATH="/root/miniforge3/bin:${PATH}"
RUN wget -q \
https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh \
&& bash Miniforge3-Linux-x86_64.sh -b \
&& rm -f Miniforge3-Linux-x86_64.sh
RUN mamba --version
# Setup mamba environment.
RUN mamba create -n streamlit-env python=3.11
RUN echo "mamba activate streamlit-env" >> ~/.bashrc
SHELL ["/bin/bash", "--rcfile", "~/.bashrc"]
SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"]
# Install up-to-date cmake via mamba and packages for pyOpenMS build.
RUN mamba install cmake
RUN pip install --upgrade pip && python -m pip install -U setuptools nose cython "autowrap<=0.24" pandas numpy pytest
# Clone OpenMS branch and the associcated contrib+thirdparties+pyOpenMS-doc submodules.
RUN git clone --recursive --depth=1 -b ${OPENMS_BRANCH} --single-branch ${OPENMS_REPO} && cd /OpenMS
# Pull Linux compatible third-party dependencies and store them in directory thirdparty.
WORKDIR /OpenMS
RUN mkdir /thirdparty && \
git submodule update --init THIRDPARTY && \
cp -r THIRDPARTY/All/* /thirdparty && \
cp -r THIRDPARTY/Linux/x86_64/* /thirdparty && \
chmod -R +x /thirdparty
ENV PATH="/thirdparty/LuciPHOr2:/thirdparty/MSGFPlus:/thirdparty/Sirius:/thirdparty/ThermoRawFileParser:/thirdparty/Comet:/thirdparty/Fido:/thirdparty/MaRaCluster:/thirdparty/MyriMatch:/thirdparty/OMSSA:/thirdparty/Percolator:/thirdparty/SpectraST:/thirdparty/XTandem:/thirdparty/crux:${PATH}"
# Build OpenMS and pyOpenMS.
FROM setup-build-system AS compile-openms
WORKDIR /
# Set up build directory.
RUN mkdir /openms-build
WORKDIR /openms-build
# Configure.
RUN /bin/bash -c "cmake -DCMAKE_BUILD_TYPE='Release' -DCMAKE_PREFIX_PATH='/OpenMS/contrib-build/;/usr/;/usr/local' -DHAS_XSERVER=OFF -DBOOST_USE_STATIC=OFF -DPYOPENMS=ON -DWITH_UV=OFF -DPYOPENMS_PREPARE_WHEEL_REPAIR=ON -DPython_EXECUTABLE=/root/miniforge3/envs/streamlit-env/bin/python ../OpenMS -DPY_MEMLEAK_DISABLE=On"
# Build TOPP tools and clean up.
RUN make -j4 OpenSwathAssayGenerator OpenSwathDecoyGenerator OpenSwathWorkflow
RUN rm -rf src doc CMakeFiles
# Build pyOpenMS and produce a repairable wheel using the CMake packaging target.
RUN make -j4 pyopenms
WORKDIR /openms-build/pyOpenMS
# Ensure wheel tooling is available in the build Python, then package and repair the wheel.
RUN set -eux; \
PY=/root/miniforge3/envs/streamlit-env/bin/python; \
echo "Installing wheel tooling into build python..."; \
$PY -m pip install --no-cache-dir -U pip build auditwheel py-build-cmake; \
cd /openms-build; \
echo "Invoking CMake pyopenms_wheel target to package wheel (will continue on error)..."; \
cmake --build . --target pyopenms_wheel || true; \
echo "Wheel directory listing (pyopenms_wheels):"; ls -la pyopenms_wheels || true; \
if compgen -G "pyopenms_wheels/*.whl" > /dev/null; then \
echo "Found built wheel(s) in pyopenms_wheels, repairing with auditwheel..."; \
mkdir -p /openms-build/pyopenms_wheels_repaired; \
auditwheel repair -w /openms-build/pyopenms_wheels_repaired pyopenms_wheels/*.whl; \
echo "Repaired wheels:"; ls -la /openms-build/pyopenms_wheels_repaired; \
if compgen -G "/openms-build/pyopenms_wheels_repaired/*.whl" > /dev/null; then \
echo "Installing repaired wheel(s) into build python..."; \
for f in /openms-build/pyopenms_wheels_repaired/*.whl; do $PY -m pip install "$f"; done; \
else \
echo "ERROR: auditwheel did not produce any repaired wheels"; \
ls -la /openms-build/pyopenms_wheels_repaired || true; \
exit 1; \
fi; \
elif compgen -G "pyOpenMS/dist/*.whl" > /dev/null; then \
echo "Found legacy dist wheel, installing..."; \
$PY -m pip install pyOpenMS/dist/*.whl; \
else \
echo "No wheel produced; falling back to development install (editable)"; \
cd /openms-build/pyOpenMS; \
echo "Installing editable pyopenms into build python..."; \
$PY -m pip install -e . --no-cache-dir --no-binary=pyopenms; \
echo "Editable install completed."; \
fi
# Install other dependencies (excluding pyopenms)
COPY requirements.txt ./requirements.txt
RUN grep -Ev '^pyopenms([=<>!~].*)?$' requirements.txt > requirements_cleaned.txt && mv requirements_cleaned.txt requirements.txt
RUN pip install -r requirements.txt
WORKDIR /
RUN mkdir openms
# Copy TOPP tools bin directory, add to PATH.
RUN cp -r openms-build/bin /openms/bin
ENV PATH="/openms/bin/:${PATH}"
# Copy TOPP tools bin directory, add to PATH.
RUN cp -r openms-build/lib /openms/lib
ENV LD_LIBRARY_PATH="/openms/lib/:${LD_LIBRARY_PATH}"
# Copy share folder, add to PATH, remove source directory.
RUN cp -r OpenMS/share/OpenMS /openms/share
RUN rm -rf OpenMS
ENV OPENMS_DATA_PATH="/openms/share/"
# Remove build directory.
RUN rm -rf openms-build
# Prepare and run streamlit app.
FROM compile-openms AS run-app
# Install Redis server for job queue and nginx for load balancing
RUN apt-get update && apt-get install -y --no-install-recommends redis-server nginx \
&& rm -rf /var/lib/apt/lists/*
# Create Redis data directory
RUN mkdir -p /var/lib/redis && chown redis:redis /var/lib/redis
# Create workdir and copy over all streamlit related files/folders.
# note: specifying folder with slash as suffix and repeating the folder name seems important to preserve directory structure
WORKDIR /app
COPY assets/ /app/assets
COPY content/ /app/content
COPY docs/ /app/docs
COPY example-data/ /app/example-data
COPY gdpr_consent/ /app/gdpr_consent
COPY hooks/ /app/hooks
COPY src/ /app/src
COPY utils/ /app/utils
COPY app.py /app/app.py
COPY settings.json /app/settings.json
COPY default-parameters.json /app/default-parameters.json
COPY presets.json /app/presets.json
COPY data /app/data
# Set environment variable for Redeem pretrain models
ENV REDEEM_PRETRAINED_MODELS_DIR=/app/data/pretrained_models
# For streamlit configuration
COPY .streamlit/ /app/.streamlit/
COPY clean-up-workspaces.py /app/clean-up-workspaces.py
# add cron job to the crontab
RUN echo "0 3 * * * /root/miniforge3/envs/streamlit-env/bin/python /app/clean-up-workspaces.py >> /app/clean-up-workspaces.log 2>&1" | crontab -
# Set default worker count (can be overridden via environment variable)
ENV RQ_WORKER_COUNT=1
ENV REDIS_URL=redis://localhost:6379/0
# Number of Streamlit server instances for load balancing (default: 1 = no load balancer)
# Set to >1 to enable nginx load balancer with multiple Streamlit instances
ENV STREAMLIT_SERVER_COUNT=1
# create entrypoint script to start cron, Redis, RQ workers, and Streamlit
RUN echo -e '#!/bin/bash\n\
set -e\n\
source /root/miniforge3/bin/activate streamlit-env\n\
\n\
# Start cron for workspace cleanup\n\
service cron start\n\
\n\
# Start Redis server in background\n\
echo "Starting Redis server..."\n\
redis-server --daemonize yes --dir /var/lib/redis --appendonly no\n\
\n\
# Wait for Redis to be ready\n\
until redis-cli ping > /dev/null 2>&1; do\n\
echo "Waiting for Redis..."\n\
sleep 1\n\
done\n\
echo "Redis is ready"\n\
\n\
# Start RQ worker(s) in background\n\
WORKER_COUNT=${RQ_WORKER_COUNT:-1}\n\
echo "Starting $WORKER_COUNT RQ worker(s)..."\n\
for i in $(seq 1 $WORKER_COUNT); do\n\
rq worker openms-workflows --url $REDIS_URL --name worker-$i &\n\
done\n\
\n\
# Load balancer setup\n\
SERVER_COUNT=${STREAMLIT_SERVER_COUNT:-1}\n\
\n\
if [ "$SERVER_COUNT" -gt 1 ]; then\n\
echo "Starting $SERVER_COUNT Streamlit instances with nginx load balancer..."\n\
\n\
# Generate nginx upstream block\n\
UPSTREAM_SERVERS=""\n\
BASE_PORT=8510\n\
for i in $(seq 0 $((SERVER_COUNT - 1))); do\n\
PORT=$((BASE_PORT + i))\n\
UPSTREAM_SERVERS="${UPSTREAM_SERVERS} server 127.0.0.1:${PORT};\\n"\n\
done\n\
\n\
# Write nginx config\n\
mkdir -p /etc/nginx\n\
echo -e "worker_processes auto;\\npid /run/nginx.pid;\\n\\nevents {\\n worker_connections 1024;\\n}\\n\\nhttp {\\n client_max_body_size 0;\\n\\n map \\$cookie_stroute \\$route_key {\\n \\x22\\x22 \\$request_id;\\n default \\$cookie_stroute;\\n }\\n\\n upstream streamlit_backend {\\n hash \\$route_key consistent;\\n${UPSTREAM_SERVERS} }\\n\\n map \\$http_upgrade \\$connection_upgrade {\\n default upgrade;\\n \\x27\\x27 close;\\n }\\n\\n server {\\n listen 0.0.0.0:8501;\\n\\n location / {\\n proxy_pass http://streamlit_backend;\\n proxy_http_version 1.1;\\n proxy_set_header Upgrade \\$http_upgrade;\\n proxy_set_header Connection \\$connection_upgrade;\\n proxy_set_header Host \\$host;\\n proxy_set_header X-Real-IP \\$remote_addr;\\n proxy_set_header X-Forwarded-For \\$proxy_add_x_forwarded_for;\\n proxy_set_header X-Forwarded-Proto \\$scheme;\\n proxy_read_timeout 86400;\\n proxy_send_timeout 86400;\\n proxy_buffering off;\\n add_header Set-Cookie \\x22stroute=\\$route_key; Path=/; HttpOnly; SameSite=Lax\\x22 always;\\n }\\n }\\n}" > /etc/nginx/nginx.conf\n\
\n\
# Start Streamlit instances on internal ports\n\
for i in $(seq 0 $((SERVER_COUNT - 1))); do\n\
PORT=$((BASE_PORT + i))\n\
echo "Starting Streamlit instance on port $PORT..."\n\
streamlit run app.py --server.port $PORT --server.address 0.0.0.0 &\n\
done\n\
\n\
sleep 2\n\
echo "Starting nginx load balancer on port 8501..."\n\
exec /usr/sbin/nginx -g "daemon off;"\n\
else\n\
# Single instance mode (default) - run Streamlit directly on port 8501\n\
echo "Starting Streamlit app..."\n\
exec streamlit run app.py --server.address 0.0.0.0\n\
fi\n\
' > /app/entrypoint.sh
# make the script executable
RUN chmod +x /app/entrypoint.sh
# Patch Analytics
RUN mamba run -n streamlit-env python hooks/hook-analytics.py
# Set Online Deployment
RUN jq '.online_deployment = true' settings.json > tmp.json && mv tmp.json settings.json
# Download latest OpenMS App executable as a ZIP file.
# ARG declared here (not at the top) — otherwise the per-run token busts the cache.
ARG GITHUB_TOKEN
RUN if [ -n "$GITHUB_TOKEN" ]; then \
echo "GITHUB_TOKEN is set, proceeding to download the release asset..."; \
gh release download -R ${GITHUB_USER}/${GITHUB_REPO} -p "OpenMS-App.zip" -D /app; \
else \
echo "GITHUB_TOKEN is not set, skipping the release asset download."; \
fi
# Run app as container entrypoint.
EXPOSE $PORT
ENTRYPOINT ["/app/entrypoint.sh"]