Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,6 @@ jobs:
"LLAMA_SERVER_VARIANT=cuda"
"BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04"
"VLLM_VERSION=${{ env.VLLM_VERSION }}"
"VLLM_CUDA_VERSION=cu130"
"VLLM_PYTHON_TAG=cp38-abi3"
"VERSION=${{ env.RELEASE_TAG }}"
push: true
sbom: true
Expand Down
11 changes: 4 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,10 @@ USER modelrunner
# Install uv and vLLM as modelrunner user
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
&& ~/.local/bin/uv venv --python /usr/bin/python3 /opt/vllm-env \
&& if [ "$TARGETARCH" = "amd64" ]; then \
WHEEL_ARCH="manylinux_2_31_x86_64"; \
WHEEL_URL="https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}%2B${VLLM_CUDA_VERSION}-${VLLM_PYTHON_TAG}-${WHEEL_ARCH}.whl"; \
~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "$WHEEL_URL"; \
else \
~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "vllm==${VLLM_VERSION}"; \
fi
&& printf '%s' "${VLLM_VERSION}" | grep -qE '^(nightly|[0-9]+\.[0-9]+\.[0-9]+|[0-9a-f]{7,40})$' \
|| { echo "Invalid VLLM_VERSION: must be a version (e.g. 0.16.0), 'nightly', or a hex commit hash"; exit 1; } \
&& ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python vllm \
--extra-index-url "https://wheels.vllm.ai/${VLLM_VERSION}/${VLLM_CUDA_VERSION}"

RUN /opt/vllm-env/bin/python -c "import vllm; print(vllm.__version__)" > /opt/vllm-env/version

Expand Down
10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ LLAMA_SERVER_VERSION := latest
LLAMA_SERVER_VARIANT := cpu
BASE_IMAGE := ubuntu:24.04
VLLM_BASE_IMAGE := nvidia/cuda:13.0.2-runtime-ubuntu24.04
VLLM_VERSION ?= 0.12.0
DOCKER_IMAGE := docker/model-runner:latest
DOCKER_IMAGE_VLLM := docker/model-runner:latest-vllm-cuda
DOCKER_IMAGE_SGLANG := docker/model-runner:latest-sglang
Expand All @@ -19,6 +20,7 @@ DOCKER_BUILD_ARGS := \
--build-arg LLAMA_SERVER_VERSION=$(LLAMA_SERVER_VERSION) \
--build-arg LLAMA_SERVER_VARIANT=$(LLAMA_SERVER_VARIANT) \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg VLLM_VERSION='$(VLLM_VERSION)' \
--target $(DOCKER_TARGET) \
-t $(DOCKER_IMAGE)

Expand Down Expand Up @@ -232,13 +234,13 @@ vllm-metal-dev:
rm -rf "$(VLLM_METAL_INSTALL_DIR)"; \
$$PYTHON_BIN -m venv "$(VLLM_METAL_INSTALL_DIR)"; \
. "$(VLLM_METAL_INSTALL_DIR)/bin/activate" && \
VLLM_VERSION="0.13.0" && \
VLLM_UPSTREAM_VERSION="0.13.0" && \
WORK_DIR=$$(mktemp -d) && \
curl -fsSL -o "$$WORK_DIR/vllm.tar.gz" "https://github.com/vllm-project/vllm/releases/download/v$$VLLM_VERSION/vllm-$$VLLM_VERSION.tar.gz" && \
curl -fsSL -o "$$WORK_DIR/vllm.tar.gz" "https://github.com/vllm-project/vllm/releases/download/v$$VLLM_UPSTREAM_VERSION/vllm-$$VLLM_UPSTREAM_VERSION.tar.gz" && \
tar -xzf "$$WORK_DIR/vllm.tar.gz" -C "$$WORK_DIR" && \
pip install -r "$$WORK_DIR/vllm-$$VLLM_VERSION/requirements/cpu.txt" && \
pip install -r "$$WORK_DIR/vllm-$$VLLM_UPSTREAM_VERSION/requirements/cpu.txt" && \
pip install -e "$(VLLM_METAL_PATH)" && \
pip install -r "$$WORK_DIR/vllm-$$VLLM_VERSION/requirements/common.txt" && \
pip install -r "$$WORK_DIR/vllm-$$VLLM_UPSTREAM_VERSION/requirements/common.txt" && \
rm -rf "$$WORK_DIR" && \
echo "dev" > "$(VLLM_METAL_INSTALL_DIR)/.vllm-metal-version"; \
echo "vllm-metal dev installed from $(VLLM_METAL_PATH)"
Expand Down