-
Notifications
You must be signed in to change notification settings - Fork 493
Trigger unit tests for docker images upload workflow #3329
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: xibin/ci
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,7 +15,7 @@ | |
| # This workflow builds and pushes MaxText images for both TPU and GPU devices. | ||
| # It runs automatically daily at 12am UTC, on Pull Requests, or manually via Workflow Dispatch. | ||
|
|
||
| name: Build Images | ||
| name: Build and Test Images | ||
|
|
||
| on: | ||
| schedule: | ||
|
|
@@ -32,6 +32,11 @@ on: | |
| - all | ||
| - tpu | ||
| - gpu | ||
| for_dev_test: | ||
xibinliu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| description: 'For development test purpose. All images will be added a -test suffix' | ||
| required: false | ||
| type: boolean | ||
| default: false | ||
|
|
||
| permissions: | ||
| contents: read | ||
|
|
@@ -42,6 +47,7 @@ jobs: | |
| outputs: | ||
| maxtext_sha: ${{ steps.vars.outputs.maxtext_sha }} | ||
| image_date: ${{ steps.vars.outputs.image_date }} | ||
| image_suffix: ${{ steps.vars.outputs.image_suffix }} | ||
| steps: | ||
| - name: Checkout MaxText | ||
| uses: actions/checkout@v5 | ||
|
|
@@ -55,6 +61,13 @@ jobs: | |
| # Image date | ||
| echo "image_date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT | ||
|
|
||
| # If for_dev_test is true, set suffix to -test, otherwise empty | ||
| if [[ "${{ github.event.inputs.for_dev_test }}" == "true" ]]; then | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can skip this |
||
| echo "image_suffix=-test" >> $GITHUB_OUTPUT | ||
| else | ||
| echo "image_suffix=" >> $GITHUB_OUTPUT | ||
| fi | ||
|
|
||
| tpu-pre-training: | ||
| name: ${{ matrix.image_name }} | ||
| needs: setup | ||
|
|
@@ -72,25 +85,27 @@ jobs: | |
| dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile | ||
| uses: ./.github/workflows/build_and_push_docker_image.yml | ||
| with: | ||
| image_name: ${{ matrix.image_name }} | ||
| image_name: ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix }} | ||
| device: ${{ matrix.device }} | ||
| build_mode: ${{ matrix.build_mode }} | ||
| dockerfile: ${{ matrix.dockerfile }} | ||
| maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }} | ||
| image_date: ${{ needs.setup.outputs.image_date }} | ||
| test_mode: tpu-pre-training | ||
|
|
||
| tpu-post-training-nightly: | ||
| name: tpu-post-training-nightly | ||
| needs: [setup] | ||
| uses: ./.github/workflows/build_and_push_docker_image.yml | ||
| with: | ||
| image_name: maxtext_post_training_nightly | ||
| image_name: maxtext_post_training_nightly${{ needs.setup.outputs.image_suffix }} | ||
| device: tpu | ||
| build_mode: nightly | ||
| workflow: post-training | ||
| dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile | ||
| maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }} | ||
| image_date: ${{ needs.setup.outputs.image_date }} | ||
| test_mode: tpu-post-training | ||
|
|
||
| gpu-pre-training: | ||
| name: ${{ matrix.image_name }} | ||
|
|
@@ -109,9 +124,10 @@ jobs: | |
| dockerfile: ./src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile | ||
| uses: ./.github/workflows/build_and_push_docker_image.yml | ||
| with: | ||
| image_name: ${{ matrix.image_name }} | ||
| image_name: ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix }} | ||
| device: ${{ matrix.device }} | ||
| build_mode: ${{ matrix.build_mode }} | ||
| dockerfile: ${{ matrix.dockerfile }} | ||
| maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }} | ||
| image_date: ${{ needs.setup.outputs.image_date }} | ||
| test_mode: gpu-pre-training | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,6 +45,10 @@ on: | |
| required: false | ||
| type: string | ||
| default: '' | ||
| test_mode: | ||
| description: "Test mode (tpu-pre-training, tpu-post-training, gpu-pre-training)" | ||
| required: true | ||
| type: string | ||
|
|
||
| permissions: | ||
| contents: read | ||
|
|
@@ -61,6 +65,8 @@ jobs: | |
| github.event.inputs.target_device == 'tpu' || | ||
| github.event.inputs.target_device == 'gpu' | ||
| ) | ||
| outputs: | ||
| should_run: ${{ steps.check.outputs.should_run }} # Map the step output to the job level | ||
| steps: | ||
| - name: Check if build should run | ||
| id: check | ||
|
|
@@ -87,7 +93,9 @@ jobs: | |
| ref: ${{ inputs.maxtext_sha }} | ||
|
|
||
| - name: Checkout post-training dependencies | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please rebase, this is now removed. |
||
| if: steps.check.outputs.should_run == 'true' && inputs.image_name == 'maxtext_post_training_nightly' | ||
| if: | | ||
| steps.check.outputs.should_run == 'true' && | ||
| contains(inputs.image_name, 'post_training_nightly') | ||
| run: | | ||
| git clone https://github.com/google/tunix.git ./tunix | ||
| git clone https://github.com/vllm-project/vllm.git ./vllm | ||
|
|
@@ -127,38 +135,89 @@ jobs: | |
| LIBTPU_VERSION=NONE | ||
| INCLUDE_TEST_ASSETS=true | ||
|
|
||
| test: | ||
| needs: build_and_push | ||
| if: | | ||
| needs.build_and_push.result == 'success' && | ||
| needs.build_and_push.outputs.should_run == 'true' | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| flavor: >- | ||
| ${{ fromJSON('{ | ||
| "gpu-pre-training": ["gpu-unit", "gpu-integration"], | ||
| "tpu-post-training": ["post-training-tpu-unit", "post-training-tpu-integration", "post-training-cpu-unit"], | ||
| "tpu-pre-training": ["tpu-unit", "tpu-integration", "cpu-unit"] | ||
| }')[inputs.test_mode] }} | ||
| uses: ./.github/workflows/run_tests_coordinator.yml | ||
| with: | ||
| flavor: ${{ matrix.flavor }} | ||
| base_image: ${{ inputs.image_name }}:${{ github.run_id }} | ||
| is_scheduled_run: true | ||
| maxtext_installed: true | ||
|
|
||
| notebook-test: | ||
| needs: build_and_push | ||
| if: | | ||
| inputs.test_mode == 'tpu-post-training' && | ||
| needs.build_and_push.result == 'success' && | ||
| needs.build_and_push.outputs.should_run == 'true' | ||
| uses: ./.github/workflows/run_jupyter_notebooks.yml | ||
| with: | ||
| device_type: tpu | ||
| device_name: v6e-4 | ||
| base_image: ${{ inputs.image_name }}:${{ github.run_id }} | ||
| cloud_runner: linux-x86-ct6e-180-4tpu | ||
| maxtext_installed: true | ||
| secrets: | ||
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
|
|
||
| tagging: | ||
| needs: [test, notebook-test] | ||
| if: | | ||
| always() && | ||
| needs.test.result == 'success' && | ||
| (needs.notebook-test.result == 'success' || needs.notebook-test.result == 'skipped') | ||
| runs-on: linux-x86-n2-16-buildkit | ||
| container: google/cloud-sdk:524.0.0 | ||
| steps: | ||
| - name: Configure Docker | ||
| run: gcloud auth configure-docker us-docker.pkg.dev,gcr.io -q | ||
|
|
||
| - name: Add tags to Docker image | ||
| if: steps.check.outputs.should_run == 'true' | ||
| shell: bash | ||
| run: | | ||
| SOURCE_IMAGE="gcr.io/tpu-prod-env-multipod/${INPUTS_IMAGE_NAME}" | ||
| TEMP_IMG="${SOURCE_IMAGE}:${{ github.run_id }}" | ||
|
|
||
| if [[ $INPUTS_VERSION_NAME ]]; then | ||
| echo "Tagging docker images corresponding to PyPI release..." | ||
| gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${INPUTS_VERSION_NAME}" --quiet | ||
| gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:${INPUTS_VERSION_NAME}" --quiet | ||
| else | ||
| echo "Tagging docker images corresponding to nightly release..." | ||
|
|
||
| # Add date tag | ||
| gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${INPUTS_IMAGE_DATE}" --quiet | ||
| gcloud container images add-tag "${TEMP_IMG}" "$SOURCE_IMAGE:${INPUTS_IMAGE_DATE}" --quiet | ||
|
|
||
| # Convert date to YYYYMMDD format | ||
| clean_date=$(echo "${INPUTS_IMAGE_DATE}" | sed 's/[-:]//g' | cut -c1-8) | ||
|
|
||
| # Add MaxText tag | ||
| maxtext_hash=$(git rev-parse --short HEAD) | ||
| gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:maxtext_${maxtext_hash}_${clean_date}" --quiet | ||
|
|
||
| # Add post-training dependencies tags | ||
| if [ "${{ inputs.workflow }}" == "post-training" ]; then | ||
| for dir in tunix vllm tpu-inference; do | ||
| if [ -d "./$dir" ]; then | ||
| dir_hash=$(git -C "$dir" rev-parse --short HEAD) | ||
| gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${dir}_${dir_hash}_${clean_date}" --quiet | ||
| gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:maxtext_${maxtext_hash}_${clean_date}" --quiet | ||
|
|
||
| # Add post-training dependencies tags | ||
| if [ "${{ inputs.workflow }}" == "post-training" ]; then | ||
| for dir in tunix vllm tpu-inference; do | ||
| if [ -d "./$dir" ]; then | ||
| dir_hash=$(git -C "$dir" rev-parse --short HEAD) | ||
| gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:${dir}_${dir_hash}_${clean_date}" --quiet | ||
| fi | ||
| done | ||
| fi | ||
| fi | ||
| # Latest Tag | ||
| gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:latest" --quiet | ||
| env: | ||
| INPUTS_IMAGE_NAME: ${{ inputs.image_name }} | ||
| INPUTS_IMAGE_DATE: ${{ inputs.image_date }} | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't rename this workflow