Skip to content

Commit c57c1af

Browse files
committed
Add a worker process to archive PDFs
1 parent 6a97038 commit c57c1af

23 files changed

Lines changed: 982 additions & 66 deletions

.devcontainer/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Ignore host-specific extensions to the Docker Compose dev stack.
2+
docker-compose.extend.yml

.devcontainer/Dockerfile

Lines changed: 0 additions & 18 deletions
This file was deleted.

.devcontainer/devcontainer.json

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,24 @@
11
{
22
"name": "Archivist",
3-
"dockerComposeFile": "docker-compose.yml",
3+
"dockerComposeFile": [
4+
"docker-compose.yml",
5+
"docker-compose.extend.yml"
6+
],
7+
"remoteUser": "vscode",
48
"service": "archivist",
59
"workspaceFolder": "/workspace",
610
"customizations": {
711
"vscode": {
812
"settings": {
13+
"editor.formatOnSave": true,
14+
"editor.rulers": [
15+
98
16+
],
917
"terminal.integrated.defaultProfile.linux": "zsh"
1018
},
1119
"extensions": [
1220
"eamodio.gitlens",
13-
"jakebecker.elixir-ls",
21+
"JakeBecker.elixir-ls",
1422
"ms-azuretools.vscode-docker"
1523
]
1624
}

.devcontainer/docker-compose.yml

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ services:
22
archivist:
33
build:
44
context: .
5-
dockerfile: Dockerfile
5+
dockerfile: ../Dockerfile
6+
target: devcontainer
67
volumes:
78
- ..:/workspace:cached
89
- shell_histories:/root/.cache/erlang-history
@@ -11,20 +12,11 @@ services:
1112
network_mode: service:ollama
1213

1314
ollama:
14-
image: ollama/ollama:0.5.5
15-
environment:
16-
POSTGRES_PASSWORD: postgres
15+
image: ollama/ollama:0.5.7
1716
ports:
1817
- "11434:11434"
1918
volumes:
2019
- ollama_data:/root/.ollama
21-
deploy:
22-
resources:
23-
reservations:
24-
devices:
25-
- driver: nvidia
26-
count: 1
27-
capabilities: [gpu]
2820

2921
volumes:
3022
shell_histories:

.github/workflows/elixir.yaml

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
name: Elixir CI
2+
3+
on:
4+
push:
5+
branches: ["main"]
6+
pull_request:
7+
workflow_dispatch:
8+
9+
env:
10+
MIX_ENV: test
11+
12+
permissions:
13+
contents: read
14+
packages: write
15+
16+
jobs:
17+
build:
18+
name: Build
19+
runs-on: ubuntu-latest
20+
steps:
21+
- name: Checkout code
22+
uses: actions/checkout@v4
23+
with:
24+
fetch-depth: 0
25+
26+
- name: Restore file modification timestamps
27+
uses: chetan/git-restore-mtime-action@v2
28+
29+
- name: Build the Docker image
30+
run: |
31+
docker login -u ${{ github.actor }} -p ${{ github.token }} ghcr.io
32+
33+
image="ghcr.io/${{ github.repository }}"
34+
sha_tag="${image}:${{ github.sha }}"
35+
branch="${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}"
36+
branch_tag="${image}:${branch}"
37+
38+
builder_tag="${branch_tag}_builder"
39+
main_builder_tag="${image}:main_builder"
40+
41+
main_tag="${image}:main"
42+
43+
docker buildx build --push --tag $builder_tag \
44+
--target builder \
45+
--cache-to type=inline \
46+
--cache-from $main_builder_tag \
47+
--cache-from $builder_tag \
48+
.
49+
50+
docker buildx build --push --tag $branch_tag --tag $sha_tag \
51+
--target runner \
52+
--cache-to type=inline \
53+
--cache-from $builder_tag \
54+
--cache-from $main_tag \
55+
--cache-from $branch_tag \
56+
.
57+
58+
build_devcontainer:
59+
name: Build devcontainer
60+
runs-on: ubuntu-latest
61+
outputs:
62+
image: ${{ steps.build_docker_image.outputs.image }}
63+
steps:
64+
- name: Checkout code
65+
uses: actions/checkout@v4
66+
with:
67+
fetch-depth: 0
68+
69+
- name: Restore file modification timestamps
70+
uses: chetan/git-restore-mtime-action@v2
71+
72+
- id: build_docker_image
73+
name: Build the Docker image
74+
run: |
75+
docker login -u ${{ github.actor }} -p ${{ github.token }} ghcr.io
76+
77+
image="ghcr.io/${{ github.repository }}"
78+
sha_tag="${image}:${{ github.sha }}"
79+
branch="${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}"
80+
branch_tag="${image}:${branch}"
81+
82+
devcontainer_tag="${branch_tag}_devcontainer"
83+
main_devcontainer_tag="${image}:main_devcontainer"
84+
85+
docker buildx build --push --tag $devcontainer_tag \
86+
--target devcontainer \
87+
--build-arg USER_UID=1001 \
88+
--cache-to type=inline \
89+
--cache-from $main_devcontainer_tag \
90+
--cache-from $devcontainer_tag \
91+
.
92+
93+
echo "image=${devcontainer_tag}" >> "$GITHUB_OUTPUT"
94+
95+
test:
96+
name: Test
97+
runs-on: ubuntu-latest
98+
needs: build_devcontainer
99+
container:
100+
image: ${{ needs.build_devcontainer.outputs.image }}
101+
credentials:
102+
username: ${{ github.actor }}
103+
password: ${{ github.token }}
104+
options: --user 1001
105+
steps:
106+
- name: Checkout code
107+
uses: actions/checkout@v4
108+
with:
109+
fetch-depth: 0
110+
111+
- name: Restore file modification timestamps
112+
uses: chetan/git-restore-mtime-action@v2
113+
114+
- name: Cache deps
115+
id: cache-deps
116+
uses: actions/cache@v4
117+
env:
118+
cache-name: cache-elixir-deps
119+
with:
120+
path: deps
121+
key: ${{ runner.os }}-mix-${{ env.cache-name }}-${{ hashFiles('**/mix.lock') }}
122+
restore-keys: |
123+
${{ runner.os }}-mix-${{ env.cache-name }}-
124+
125+
- name: Cache compiled build
126+
id: cache-build
127+
uses: actions/cache@v4
128+
env:
129+
cache-name: cache-compiled-build
130+
with:
131+
path: _build
132+
key: ${{ runner.os }}-mix-${{ env.cache-name }}-${{ hashFiles('**/mix.lock') }}
133+
restore-keys: |
134+
${{ runner.os }}-mix-${{ env.cache-name }}-
135+
${{ runner.os }}-mix-
136+
137+
- name: Clean to rule out incremental build as a source of flakiness
138+
if: github.run_attempt != '1'
139+
run: |
140+
mix deps.clean --all
141+
mix clean
142+
shell: sh
143+
144+
- name: Install dependencies
145+
run: mix deps.get
146+
147+
- name: Compiles without warnings
148+
run: mix compile --warnings-as-errors
149+
150+
- name: Check Formatting
151+
run: mix format --check-formatted
152+
153+
- name: Check for retired dependencies
154+
run: mix hex.audit
155+
156+
- name: Check for unused dependencies
157+
run: mix deps.unlock --check-unused
158+
159+
- name: Run tests
160+
run: mix test

.tool-versions

Lines changed: 0 additions & 2 deletions
This file was deleted.

Dockerfile

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
ARG ELIXIR_VERSION=1.18.2
2+
ARG OTP_VERSION=27.2.1
3+
ARG DEBIAN_VERSION=bookworm-20250113-slim
4+
5+
ARG BUILDER_IMAGE="hexpm/elixir:${ELIXIR_VERSION}-erlang-${OTP_VERSION}-debian-${DEBIAN_VERSION}"
6+
ARG RUNNER_IMAGE="debian:${DEBIAN_VERSION}"
7+
8+
FROM ${BUILDER_IMAGE} AS devcontainer
9+
10+
RUN apt-get update && apt-get install -y \
11+
curl \
12+
git \
13+
gnupg2 \
14+
ocrmypdf \
15+
poppler-utils \
16+
sudo \
17+
zsh
18+
19+
ARG USERNAME=vscode
20+
ARG USER_UID=1000
21+
ARG USER_GID=$USER_UID
22+
23+
# Add vscode user
24+
RUN groupadd --gid $USER_GID $USERNAME \
25+
&& useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \
26+
&& echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
27+
&& chmod 0440 /etc/sudoers.d/$USERNAME
28+
29+
USER $USERNAME
30+
31+
# Let the BEAM change its clock when the system time changes.
32+
ENV ERL_FLAGS="+C multi_time_warp"
33+
34+
# Enable history in IEX.
35+
ENV ERL_AFLAGS="-kernel shell_history enabled"
36+
37+
# Install Oh My Zsh
38+
RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)"
39+
40+
# Co-locate the zsh history with IEx shell history for convenience
41+
RUN mkdir -p /home/${USERNAME}/.cache/erlang-history \
42+
&& SNIPPET="export HISTFILE=/home/${USERNAME}/.cache/erlang-history/.zsh_history" \
43+
&& echo "$SNIPPET" >> "/home/${USERNAME}/.zshrc"
44+
45+
RUN mix local.hex --force && mix local.rebar --force
46+
47+
WORKDIR /workspace
48+
49+
50+
FROM ${BUILDER_IMAGE} AS builder
51+
52+
# install build dependencies
53+
RUN apt-get update -y && apt-get install -y build-essential git \
54+
&& apt-get clean && rm -f /var/lib/apt/lists/*_*
55+
56+
# prepare build dir
57+
WORKDIR /app
58+
59+
# install hex + rebar
60+
RUN mix local.hex --force && \
61+
mix local.rebar --force
62+
63+
# set build ENV
64+
ENV MIX_ENV="prod"
65+
66+
# install mix dependencies
67+
COPY mix.exs mix.lock ./
68+
RUN mix deps.get --only $MIX_ENV
69+
RUN mkdir config
70+
71+
# copy compile-time config files before we compile dependencies to ensure any relevant config
72+
# change will trigger the dependencies to be re-compiled.
73+
COPY config/config.exs config/${MIX_ENV}.exs config/
74+
RUN mix deps.compile
75+
76+
COPY lib ./lib
77+
78+
# Compile the release
79+
RUN mix compile
80+
81+
# Changes to config/runtime.exs don't require recompiling the code
82+
COPY config/runtime.exs config/
83+
84+
RUN mix release
85+
86+
87+
FROM ${RUNNER_IMAGE} AS runner
88+
89+
RUN apt-get update -y && apt-get install -y \
90+
ca-certificates \
91+
libncurses5 \
92+
libstdc++6 \
93+
locales \
94+
ocrmypdf \
95+
openssl \
96+
poppler-utils \
97+
&& apt-get clean && rm -f /var/lib/apt/lists/*_*
98+
99+
# Set the locale
100+
RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen
101+
102+
ENV LANG="en_US.UTF-8"
103+
ENV LANGUAGE="en_US:en"
104+
ENV LC_ALL="en_US.UTF-8"
105+
106+
WORKDIR "/app"
107+
RUN chown nobody /app
108+
109+
# set runner ENV
110+
ENV MIX_ENV="prod"
111+
112+
# Only copy the final release from the build stage
113+
COPY --from=builder --chown=nobody:root /app/_build/${MIX_ENV}/rel/archivist ./
114+
115+
USER nobody
116+
117+
CMD ["/app/bin/archivist", "start"]

config/config.exs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import Config
2+
3+
ollama_timeout_seconds =
4+
String.to_integer(System.get_env("ARCHIVIST_OLLAMA_TIMEOUT_SECONDS", "60"))
5+
6+
config :archivist, :ollama,
7+
base_url: System.get_env("ARCHIVIST_OLLAMA_BASE_URL", "http://localhost:11434/api"),
8+
receive_timeout: to_timeout(second: ollama_timeout_seconds)
9+
10+
# Import environment specific config. This must remain at the bottom of this file so it overrides
11+
# the configuration defined above.
12+
import_config "#{config_env()}.exs"

config/dev.exs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import Config
2+
3+
config :archivist,
4+
worker: [
5+
archive: "/archive",
6+
check_interval: to_timeout(minute: 1),
7+
inbox: "/inbox",
8+
llm_timeout: to_timeout(minute: 5)
9+
]

config/prod.exs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
import Config

0 commit comments

Comments
 (0)