Skip to content

Commit 33eeb1f

Browse files
authored
chore(medcat-trainer): update deps (#325)
* chore(medcat-trainer): use uv, pyproject.toml, upgrade docker python, upgrade django, django-polymorphic * fix(medcat-trainer): review comments * fix(medcat-trainer): review comments * fix(medcat-trainer): review comment * fix(medcat-trainer): dockerfile comment --------- Co-authored-by: Tom Searle <tom@cogstack.org>
1 parent 340284f commit 33eeb1f

10 files changed

Lines changed: 3595 additions & 49 deletions

File tree

.github/workflows/medcat-trainer_ci.yml

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,12 @@ jobs:
100100
with:
101101
ref: ${{ github.ref }}
102102

103-
- name: Set up Python
104-
uses: actions/setup-python@v6
103+
- name: Install uv for Python 3.12
104+
uses: astral-sh/setup-uv@v7
105105
with:
106-
python-version: "3.11"
106+
python-version: "3.12"
107+
enable-cache: true
108+
cache-dependency-glob: "medcat-trainer/webapp/uv.lock"
107109

108110
- name: Install system dependencies
109111
run: |
@@ -115,15 +117,21 @@ jobs:
115117
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
116118
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
117119
118-
- name: Install Python dependencies
120+
- name: Install Python dependencies with uv
119121
run: |
120-
python -m pip install --upgrade pip
121122
cd webapp
122-
pip install -r requirements.txt
123+
uv sync --frozen
124+
125+
- name: Ensure pip inside uv environment
126+
run: |
127+
cd webapp
128+
uv run python -m ensurepip --upgrade
129+
uv run python -m pip install --upgrade pip
123130
124131
- name: Download spaCy model
125132
run: |
126-
python -m spacy download en_core_web_md
133+
cd webapp
134+
uv run python -m spacy download en_core_web_md
127135
128136
- name: Run Django tests
129137
env:
@@ -132,7 +140,7 @@ jobs:
132140
DEBUG: 1
133141
run: |
134142
cd webapp/api
135-
python manage.py test
143+
uv run python manage.py test
136144
137145
# Build and test webapp container
138146
build-and-push:
@@ -190,7 +198,7 @@ jobs:
190198
- name: Run Django Tests
191199
run: |
192200
# run tests using the built image
193-
docker run --rm cogstacksystems/medcat-trainer:test python manage.py test
201+
docker run --rm cogstacksystems/medcat-trainer:test /home/.venv/bin/python manage.py test
194202
195203
- name: Push Docker MedCATtrainer image
196204
id: docker_build_push

medcat-trainer/docs/installation.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,16 @@ The above commands runs the latest release of MedCATtrainer, if you'd prefer to
3131
$ docker-compose -f docker-compose-dev.yml up
3232
```
3333

34+
The webapp Python dependencies are managed with **uv** and **pyproject.toml** (see `medcat-trainer/webapp/pyproject.toml`). To install locally for development:
35+
36+
```shell
37+
$ cd medcat-trainer/webapp
38+
$ uv sync --no-install-project
39+
$ uv run python api/manage.py runserver
40+
```
41+
42+
To add or update dependencies, `uv add && uvlock`; commit `uv.lock` for reproducible Docker builds.
43+
3444
To change environment variables, such as the exposed host ports and language of spaCy model, use:
3545
```shell
3646
$ cp .env-example .env
@@ -111,11 +121,11 @@ Currently, there are two roles that can be assigned to users:
111121

112122

113123
### (Optional) Postgres Database Support
114-
MedCAT trainer defaults to a local SQLite database, which is suitable for single-user or small-scale setups.
124+
MedCAT trainer defaults to a local SQLite database, which is suitable for single-user or small-scale setups.
115125

116126
For larger deployments, or to support multiple replicas of the app for example in Kubernetes, you may want to run a postgresql database.
117127

118-
You can optionally use a postgresql database instead by setting the following env variables.
128+
You can optionally use a postgresql database instead by setting the following env variables.
119129

120130
|Parameter|Description|
121131
|---------|-----------|
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Don't copy host venv — image builds its own in Dockerfile.
2+
# Copying it would overwrite the Linux venv with host (e.g. macOS) binaries.
3+
.venv/
4+
.venv
5+
__pycache__/
6+
*.pyc
7+
.git
8+
.gitignore
9+
*.md
10+
.pytest_cache
11+
.mypy_cache
12+
node_modules/

medcat-trainer/webapp/Dockerfile

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:3.11
1+
FROM python:3.12
22

33
# Update and upgrade everything
44
RUN apt-get update -y && \
@@ -28,16 +28,18 @@ WORKDIR /home/frontend
2828
COPY frontend/package.json frontend/package-lock.json ./
2929
RUN npm install
3030

31-
# Install Python dependencies
31+
# Install uv and Python dependencies
3232
WORKDIR /home
33-
COPY requirements.txt ./
34-
RUN pip install pip --upgrade
35-
RUN pip install "setuptools>=81.0.0,<82"
36-
RUN pip install --no-cache-dir -r requirements.txt
33+
COPY pyproject.toml uv.lock* ./
34+
RUN pip install uv \
35+
&& uv sync --frozen --no-install-project
36+
37+
# Ensure venv has pip (uv venvs don't include it; spacy download needs it)
38+
RUN uv run python -m ensurepip --upgrade
3739

3840
# Download spaCy models (only requires spaCy, not application code)
3941
ARG SPACY_MODELS="en_core_web_md"
40-
RUN for SPACY_MODEL in ${SPACY_MODELS}; do python -m spacy download ${SPACY_MODEL}; done
42+
RUN for SPACY_MODEL in ${SPACY_MODELS}; do uv run python -m spacy download ${SPACY_MODEL}; done
4143

4244
# Copy rest of project
4345
WORKDIR /home
@@ -52,7 +54,7 @@ RUN chmod u+x /home/scripts/entry.sh && \
5254
chmod u+x /home/scripts/crontab && cp /home/scripts/crontab /etc/crontab && \
5355
chmod a+x /home/scripts/run.sh && \
5456
chmod a+x /home/scripts/run-bg-process.sh && \
55-
chmod u+x /home/scripts/nginx-entrypoint.sh
56-
57+
chmod a+x /home/scripts/nginx-entrypoint.sh
58+
5759
WORKDIR /home/api/
5860

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Generated by Django 5.2.11 on 2026-02-10 00:10
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('api', '0093_add_remote_model_service_fields'),
10+
]
11+
12+
operations = [
13+
migrations.AlterModelOptions(
14+
name='project',
15+
options={},
16+
),
17+
migrations.AlterModelOptions(
18+
name='projectannotateentities',
19+
options={},
20+
),
21+
migrations.AlterField(
22+
model_name='projectannotateentities',
23+
name='use_model_service',
24+
field=models.BooleanField(default=False, help_text='Use a remote MedCAT service API for document processing instead of local models(note: interim model training is not supported for remote model service projects)'),
25+
),
26+
migrations.AlterField(
27+
model_name='projectgroup',
28+
name='use_model_service',
29+
field=models.BooleanField(default=False, help_text='Use a remote MedCAT service API for document processing instead of local models(note: interim model training is not supported for remote model service projects)'),
30+
),
31+
]
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
[project]
2+
name = "medcattrainer-webapp"
3+
version = "1.0.0"
4+
description = "MedCATtrainer Django web application"
5+
requires-python = ">=3.10"
6+
license = { text = "Apache-2.0" }
7+
dependencies = [
8+
"scipy>=1.7.0,<1.14; python_version < '3.13'", # pin for py<3.13 so macOS arm64 gets pre-built wheels
9+
"uwsgi>=2.0,<3",
10+
"Django>=5.2,<6",
11+
"django-filter>=25.2",
12+
"django-polymorphic>=4.0,<5", # 4.x uses importlib.metadata, no pkg_resources
13+
"djangorestframework>=3.16,<4",
14+
"django-background-tasks-updated>=1.2",
15+
"openpyxl>=3.1",
16+
"medcat[meta-cat,spacy,rel-cat,deid]>=2.3",
17+
"psycopg[binary,pool]>=3.2",
18+
"cryptography>=45",
19+
"drf-oidc-auth>=3.0",
20+
"django-health-check>=3.22",
21+
"requests>=2.31",
22+
]
23+
24+
[tool.uv]
25+
extra-index-url = ["https://download.pytorch.org/whl/cpu/"]
26+
index-strategy = "unsafe-best-match"

medcat-trainer/webapp/requirements.txt

Lines changed: 0 additions & 14 deletions
This file was deleted.
Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
#!/bin/sh
22

3+
# Use uv-managed project environment (no manual venv activation)
4+
export UV_PROJECT=/home
5+
36
# env vars that should only be on for app running...
47
export RESUBMIT_ALL_ON_STARTUP=0
58

69
# Collect static files and migrate if needed
7-
python /home/api/manage.py collectstatic --noinput
8-
python /home/api/manage.py makemigrations --noinput
9-
python /home/api/manage.py makemigrations api --noinput
10-
python /home/api/manage.py migrate --noinput
11-
python /home/api/manage.py migrate api --noinput
10+
uv run python /home/api/manage.py collectstatic --noinput
11+
uv run python /home/api/manage.py makemigrations --noinput
12+
uv run python /home/api/manage.py makemigrations api --noinput
13+
uv run python /home/api/manage.py migrate --noinput
14+
uv run python /home/api/manage.py migrate api --noinput
1215

13-
python /home/api/manage.py process_tasks --log-std
16+
uv run python /home/api/manage.py process_tasks --log-std

medcat-trainer/webapp/scripts/run.sh

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
#!/bin/sh
22
echo "Starting medcat trainer"
33

4+
# Use uv-managed project environment (no manual venv activation)
5+
export UV_PROJECT=/home
6+
47
# run db backup script before doing anything
58
/home/scripts/backup_db.sh
69

@@ -9,11 +12,11 @@ TMP_RESUBMIT_ALL_VAR=$RESUBMIT_ALL_ON_STARTUP
912
export RESUBMIT_ALL_ON_STARTUP=0
1013

1114
# Collect static files and migrate if needed
12-
python /home/api/manage.py collectstatic --noinput
13-
python /home/api/manage.py makemigrations --noinput
14-
python /home/api/manage.py makemigrations api --noinput
15-
python /home/api/manage.py migrate --noinput
16-
python /home/api/manage.py migrate api --noinput
15+
uv run python /home/api/manage.py collectstatic --noinput
16+
uv run python /home/api/manage.py makemigrations --noinput
17+
uv run python /home/api/manage.py makemigrations api --noinput
18+
uv run python /home/api/manage.py migrate --noinput
19+
uv run python /home/api/manage.py migrate api --noinput
1720

1821
# Generates the runtime configuration for the web app and copies it to the static directory for web access
1922
/home/scripts/nginx-entrypoint.sh
@@ -30,15 +33,15 @@ if not User.objects.filter(username=admin_username).exists():
3033
User.objects.create_superuser(admin_username, admin_email, admin_password)
3134
" | python manage.py shell
3235

33-
if [ $LOAD_EXAMPLES ]; then
36+
if [ $LOAD_EXAMPLES ]; then
3437
echo "Loading examples..."
35-
python /home/scripts/load_examples.py >> /dev/stdout 2>> /dev/stderr &
38+
uv run python /home/scripts/load_examples.py >> /dev/stdout 2>> /dev/stderr &
3639
fi
3740

3841
# Creating a default user group that can manage projects and annotate but not delete
39-
python manage.py shell < /home/scripts/create_group.py
42+
(cd /home/api && uv run python manage.py shell < /home/scripts/create_group.py)
4043

4144
# RESET any Env vars to original stat
4245
export RESUBMIT_ALL_ON_STARTUP=$TMP_RESUBMIT_ALL_VAR
4346

44-
uwsgi --http-timeout 360s --http :8000 --master --chdir /home/api/ --module core.wsgi
47+
exec uv run uwsgi --http-timeout 360s --http :8000 --master --chdir /home/api/ --module core.wsgi

0 commit comments

Comments
 (0)