diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3c5108ed..096674d6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -34,7 +34,7 @@ on: description: "vLLM version" required: false type: string - default: "0.12.0" + default: "0.17.0" sglangVersion: description: "SGLang version" required: false @@ -230,7 +230,7 @@ jobs: env: RELEASE_TAG: ${{ needs.prepare.outputs.release_tag }} LLAMA_SERVER_VERSION: ${{ inputs.llamaServerVersion || 'latest' }} - VLLM_VERSION: ${{ inputs.vllmVersion || '0.12.0' }} + VLLM_VERSION: ${{ inputs.vllmVersion }} SGLANG_VERSION: ${{ inputs.sglangVersion || '0.4.0' }} PUSH_LATEST: ${{ inputs.pushLatest || 'false' }} BUILD_MUSA_CANN: ${{ inputs.buildMusaCann || 'false' }} @@ -340,8 +340,6 @@ jobs: "LLAMA_SERVER_VARIANT=cuda" "BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04" "VLLM_VERSION=${{ env.VLLM_VERSION }}" - "VLLM_CUDA_VERSION=cu130" - "VLLM_PYTHON_TAG=cp38-abi3" "VERSION=${{ env.RELEASE_TAG }}" push: true sbom: true diff --git a/Dockerfile b/Dockerfile index 0c62726c..59d6f660 100644 --- a/Dockerfile +++ b/Dockerfile @@ -90,7 +90,7 @@ ENTRYPOINT ["/app/model-runner"] # --- vLLM variant --- FROM llamacpp AS vllm -ARG VLLM_VERSION=0.12.0 +ARG VLLM_VERSION=0.17.0 ARG VLLM_CUDA_VERSION=cu130 ARG VLLM_PYTHON_TAG=cp38-abi3 ARG TARGETARCH @@ -106,13 +106,10 @@ USER modelrunner # Install uv and vLLM as modelrunner user RUN curl -LsSf https://astral.sh/uv/install.sh | sh \ && ~/.local/bin/uv venv --python /usr/bin/python3 /opt/vllm-env \ - && if [ "$TARGETARCH" = "amd64" ]; then \ - WHEEL_ARCH="manylinux_2_31_x86_64"; \ - WHEEL_URL="https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}%2B${VLLM_CUDA_VERSION}-${VLLM_PYTHON_TAG}-${WHEEL_ARCH}.whl"; \ - ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "$WHEEL_URL"; \ - else \ - ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "vllm==${VLLM_VERSION}"; \ - fi + && printf '%s' "${VLLM_VERSION}" | grep -qE '^(nightly|[0-9]+\.[0-9]+\.[0-9]+|[0-9a-f]{7,40})$' \ + || { echo "Invalid VLLM_VERSION: must be a version (e.g. 0.16.0), 'nightly', or a hex commit hash"; exit 1; } \ + && ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python vllm \ + --extra-index-url "https://wheels.vllm.ai/${VLLM_VERSION}/${VLLM_CUDA_VERSION}" RUN /opt/vllm-env/bin/python -c "import vllm; print(vllm.__version__)" > /opt/vllm-env/version diff --git a/Makefile b/Makefile index 4d9435cc..8ceb495d 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ LLAMA_SERVER_VERSION := latest LLAMA_SERVER_VARIANT := cpu BASE_IMAGE := ubuntu:24.04 VLLM_BASE_IMAGE := nvidia/cuda:13.0.2-runtime-ubuntu24.04 +VLLM_VERSION ?= 0.17.0 DOCKER_IMAGE := docker/model-runner:latest DOCKER_IMAGE_VLLM := docker/model-runner:latest-vllm-cuda DOCKER_IMAGE_SGLANG := docker/model-runner:latest-sglang @@ -19,6 +20,7 @@ DOCKER_BUILD_ARGS := \ --build-arg LLAMA_SERVER_VERSION=$(LLAMA_SERVER_VERSION) \ --build-arg LLAMA_SERVER_VARIANT=$(LLAMA_SERVER_VARIANT) \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ + --build-arg VLLM_VERSION='$(VLLM_VERSION)' \ --target $(DOCKER_TARGET) \ -t $(DOCKER_IMAGE) @@ -232,13 +234,13 @@ vllm-metal-dev: rm -rf "$(VLLM_METAL_INSTALL_DIR)"; \ $$PYTHON_BIN -m venv "$(VLLM_METAL_INSTALL_DIR)"; \ . "$(VLLM_METAL_INSTALL_DIR)/bin/activate" && \ - VLLM_VERSION="0.13.0" && \ + VLLM_UPSTREAM_VERSION="0.13.0" && \ WORK_DIR=$$(mktemp -d) && \ - curl -fsSL -o "$$WORK_DIR/vllm.tar.gz" "https://github.com/vllm-project/vllm/releases/download/v$$VLLM_VERSION/vllm-$$VLLM_VERSION.tar.gz" && \ + curl -fsSL -o "$$WORK_DIR/vllm.tar.gz" "https://github.com/vllm-project/vllm/releases/download/v$$VLLM_UPSTREAM_VERSION/vllm-$$VLLM_UPSTREAM_VERSION.tar.gz" && \ tar -xzf "$$WORK_DIR/vllm.tar.gz" -C "$$WORK_DIR" && \ - pip install -r "$$WORK_DIR/vllm-$$VLLM_VERSION/requirements/cpu.txt" && \ + pip install -r "$$WORK_DIR/vllm-$$VLLM_UPSTREAM_VERSION/requirements/cpu.txt" && \ pip install -e "$(VLLM_METAL_PATH)" && \ - pip install -r "$$WORK_DIR/vllm-$$VLLM_VERSION/requirements/common.txt" && \ + pip install -r "$$WORK_DIR/vllm-$$VLLM_UPSTREAM_VERSION/requirements/common.txt" && \ rm -rf "$$WORK_DIR" && \ echo "dev" > "$(VLLM_METAL_INSTALL_DIR)/.vllm-metal-version"; \ echo "vllm-metal dev installed from $(VLLM_METAL_PATH)"