From 7f38801a270fec2b53eb58ba8ee891282aff96f5 Mon Sep 17 00:00:00 2001 From: ArnabChatterjee20k Date: Wed, 27 May 2026 13:56:25 +0530 Subject: [PATCH 1/4] feat: add warmup binary for build-time model caching and update Docker configuration --- Dockerfile | 13 +++++++++++-- docker-compose.yml | 7 ++++++- src/bin/warmup.rs | 21 +++++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 src/bin/warmup.rs diff --git a/Dockerfile b/Dockerfile index 0f231c1..ab81318 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,8 +10,9 @@ WORKDIR /app COPY . . RUN --mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/app/target \ - cargo build --release --bin embedding && \ - cp target/release/embedding /usr/local/bin/embedding + cargo build --release --bin embedding --bin warmup && \ + cp target/release/embedding /usr/local/bin/embedding && \ + cp target/release/warmup /usr/local/bin/warmup FROM debian:trixie-slim AS runtime RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -27,6 +28,14 @@ USER embedder WORKDIR /home/embedder COPY --from=builder /usr/local/bin/embedding /usr/local/bin/embedding +COPY --from=builder /usr/local/bin/warmup /usr/local/bin/warmup + +# Download models at build time so the image ships with them cached. Override +# the model set with `--build-arg EMBEDDING_MODELS=...` (docker compose passes +# this from .env). Pool size is forced to 1 to keep the build's memory low — +# it only affects the warmup, not the runtime pool. +ARG EMBEDDING_MODELS=nomic,bge-small +RUN EMBEDDING_MODELS="${EMBEDDING_MODELS}" EMBEDDING_POOL_SIZE=1 /usr/local/bin/warmup EXPOSE 3000 diff --git a/docker-compose.yml b/docker-compose.yml index c3b3ca1..84dfd48 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,6 +3,8 @@ services: build: context: . dockerfile: Dockerfile + args: + EMBEDDING_MODELS: ${EMBEDDING_MODELS:-nomic,bge-small} image: embedding:latest container_name: embedding ports: @@ -10,5 +12,8 @@ services: env_file: - .env volumes: - - ./models:/home/embedder/models + - models:/home/embedder/models restart: unless-stopped + +volumes: + models: diff --git a/src/bin/warmup.rs b/src/bin/warmup.rs new file mode 100644 index 0000000..cec8528 --- /dev/null +++ b/src/bin/warmup.rs @@ -0,0 +1,21 @@ +use embedding::{EmbeddingClient, EmbeddingConfig}; + +/// Build-time warmup: resolves the configured models from the environment and +/// constructs the client, which downloads each model into the cache dir. Run +/// during `docker build` so the image ships with models already cached instead +/// of fetching them on first request. +fn main() -> Result<(), Box> { + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), + ) + .init(); + + let config = EmbeddingConfig::from_env(); + tracing::info!("warmup: downloading and initializing {} model(s)", config.models.len()); + let _ = EmbeddingClient::new(config)?; + tracing::info!("warmup: models cached and ready"); + + Ok(()) +} From 20d814dda5686b8ab861e0faca36a0fcda24c3eb Mon Sep 17 00:00:00 2001 From: ArnabChatterjee20k Date: Wed, 27 May 2026 14:01:44 +0530 Subject: [PATCH 2/4] updated --- src/bin/warmup.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/bin/warmup.rs b/src/bin/warmup.rs index cec8528..4f3a348 100644 --- a/src/bin/warmup.rs +++ b/src/bin/warmup.rs @@ -1,9 +1,5 @@ use embedding::{EmbeddingClient, EmbeddingConfig}; -/// Build-time warmup: resolves the configured models from the environment and -/// constructs the client, which downloads each model into the cache dir. Run -/// during `docker build` so the image ships with models already cached instead -/// of fetching them on first request. fn main() -> Result<(), Box> { tracing_subscriber::fmt() .with_env_filter( @@ -13,7 +9,10 @@ fn main() -> Result<(), Box> { .init(); let config = EmbeddingConfig::from_env(); - tracing::info!("warmup: downloading and initializing {} model(s)", config.models.len()); + tracing::info!( + "warmup: downloading and initializing {} model(s)", + config.models.len() + ); let _ = EmbeddingClient::new(config)?; tracing::info!("warmup: models cached and ready"); From 2646f22580dd516d7a3f31555740d0c6ed9988ec Mon Sep 17 00:00:00 2001 From: ArnabChatterjee20k Date: Wed, 27 May 2026 14:07:37 +0530 Subject: [PATCH 3/4] feat: enhance warmup inference error handling and add dimension for EmbeddingGemma300M --- src/embedding.rs | 8 ++++++-- src/model.rs | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/embedding.rs b/src/embedding.rs index a40e844..22f0208 100644 --- a/src/embedding.rs +++ b/src/embedding.rs @@ -141,8 +141,12 @@ impl EmbeddingClient { // Run a warmup inference so the ONNX Runtime arena is allocated before // we measure memory. Without this, per_instance only captures model - // weights and misses the arena buffers. - let _ = first_model.embed(vec!["warmup"], None); + // weights and misses the arena buffers. A failure here means the model + // can't serve requests at all, so fail loudly — swallowing it would let + // the memory delta read ~0 and silently mis-size the pool (no OOM guard). + first_model + .embed(vec!["warmup"], None) + .map_err(|e| format!("warmup inference failed for {}: {}", model_name, e))?; sys.refresh_memory(); let memory_after_loading_model = sys.available_memory(); diff --git a/src/model.rs b/src/model.rs index a307799..0fc3267 100644 --- a/src/model.rs +++ b/src/model.rs @@ -18,6 +18,7 @@ pub fn dimension(model: &EmbeddingModel) -> usize { EmbeddingModel::BGESmallENV15 => 384, EmbeddingModel::BGEBaseENV15 => 768, EmbeddingModel::BGELargeENV15 => 1024, + EmbeddingModel::EmbeddingGemma300M => 768, _ => 768, } } @@ -43,5 +44,6 @@ mod tests { assert_eq!(dimension(&EmbeddingModel::AllMiniLML6V2), 384); assert_eq!(dimension(&EmbeddingModel::NomicEmbedTextV15), 768); assert_eq!(dimension(&EmbeddingModel::BGELargeENV15), 1024); + assert_eq!(dimension(&EmbeddingModel::EmbeddingGemma300M), 768); } } From 2a23ec290725626e6e92a351acaf42c89be2f292 Mon Sep 17 00:00:00 2001 From: ArnabChatterjee20k Date: Wed, 27 May 2026 14:20:12 +0530 Subject: [PATCH 4/4] feat: add publish workflow for DockerHub deployment --- .github/workflows/ci.yml | 12 +++++----- .github/workflows/publish.yml | 45 +++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/publish.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5f6edc8..90db1e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,13 +18,13 @@ jobs: name: fmt + clippy + test runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: dtolnay/rust-toolchain@stable + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable with: components: rustfmt, clippy - - uses: Swatinem/rust-cache@v2 + - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1 - name: cargo fmt run: cargo fmt --all -- --check @@ -41,12 +41,12 @@ jobs: name: docker build runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: docker/setup-buildx-action@v3 + - uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0 - name: Build image - uses: docker/build-push-action@v6 + uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0 with: context: . push: false diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..99713bb --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,45 @@ +name: Build and Deploy to DockerHub + +on: + push: + tags: + - '*' + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Set up QEMU + uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0 + + - name: Login to Docker Hub + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@80c7e94dd9b9319bd5eb7a0e0fe9291e23a2a2e9 # v6.1.0 + with: + images: appwrite/embedding + tags: | + type=semver,pattern={{major}}.{{minor}}.{{patch}} + type=match,pattern=.*RC.*,group=0 + + - name: Build and push + uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0 + with: + context: . + platforms: linux/amd64,linux/arm64 + build-args: | + VERSION=${{ steps.meta.outputs.version }} + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file