AI-Hypercomputer · xibinliu · Jan 9, 2026 · SurbhiJainUSC · Mar 24, 2026 · SurbhiJainUSC
@@ -15,7 +15,7 @@
 # This workflow builds and pushes MaxText images for both TPU and GPU devices.
 # It runs automatically daily at 12am UTC, on Pull Requests, or manually via Workflow Dispatch.
 
-name: Build Images
+name: Build and Test Images
 
 on:
   schedule:
@@ -32,6 +32,11 @@ on:
           - all
           - tpu
           - gpu
+      for_dev_test:
+        description: 'For development test purpose. All images will be added a -test suffix'
+        required: false
+        type: boolean
+        default: false
 
 permissions:
   contents: read
@@ -42,6 +47,7 @@ jobs:
     outputs:
       maxtext_sha: ${{ steps.vars.outputs.maxtext_sha }}
       image_date: ${{ steps.vars.outputs.image_date }}
+      image_suffix: ${{ steps.vars.outputs.image_suffix }}
     steps:
      - name: Checkout MaxText
        uses: actions/checkout@v5
@@ -55,6 +61,13 @@ jobs:
         # Image date
         echo "image_date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
 
+        # If for_dev_test is true, set suffix to -test, otherwise empty
+        if [[ "${{ github.event.inputs.for_dev_test }}" == "true" ]]; then
+          echo "image_suffix=-test" >> $GITHUB_OUTPUT
+        else
+          echo "image_suffix=" >> $GITHUB_OUTPUT
+        fi
+
   tpu-pre-training:
     name: ${{ matrix.image_name }}
     needs: setup
@@ -72,25 +85,27 @@ jobs:
             dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
     uses: ./.github/workflows/build_and_push_docker_image.yml
     with:
-      image_name: ${{ matrix.image_name }}
+      image_name: ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix  }}
       device: ${{ matrix.device }}
       build_mode: ${{ matrix.build_mode }}
       dockerfile: ${{ matrix.dockerfile }}
       maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
       image_date: ${{ needs.setup.outputs.image_date }}
+      test_mode: tpu-pre-training
 
   tpu-post-training-nightly:
     name: tpu-post-training-nightly
     needs: [setup]
     uses: ./.github/workflows/build_and_push_docker_image.yml
     with:
-      image_name: maxtext_post_training_nightly
+      image_name: maxtext_post_training_nightly${{ needs.setup.outputs.image_suffix  }}
       device: tpu
       build_mode: nightly
       workflow: post-training
       dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
       maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
       image_date: ${{ needs.setup.outputs.image_date }}
+      test_mode: tpu-post-training
 
   gpu-pre-training:
     name: ${{ matrix.image_name }}
@@ -109,9 +124,10 @@ jobs:
             dockerfile: ./src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
     uses: ./.github/workflows/build_and_push_docker_image.yml
     with:
-      image_name: ${{ matrix.image_name }}
+      image_name: ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix  }}
       device: ${{ matrix.device }}
       build_mode: ${{ matrix.build_mode }}
       dockerfile: ${{ matrix.dockerfile }}
       maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
       image_date: ${{ needs.setup.outputs.image_date }}
+      test_mode: gpu-pre-training
@@ -45,6 +45,10 @@ on:
         required: false
         type: string
         default: ''
+      test_mode:
+        description: "Test mode (tpu-pre-training, tpu-post-training, gpu-pre-training)"
+        required: true
+        type: string
 
 permissions:
   contents: read
@@ -61,6 +65,8 @@ jobs:
         github.event.inputs.target_device == 'tpu' ||
         github.event.inputs.target_device == 'gpu'
       )
+    outputs:
+      should_run: ${{ steps.check.outputs.should_run }} # Map the step output to the job level
     steps:
       - name: Check if build should run
         id: check
@@ -87,7 +93,9 @@ jobs:
           ref: ${{ inputs.maxtext_sha }}
 
       - name: Checkout post-training dependencies
-        if: steps.check.outputs.should_run == 'true' && inputs.image_name == 'maxtext_post_training_nightly'
+        if: |
+          steps.check.outputs.should_run == 'true' &&
+          contains(inputs.image_name, 'post_training_nightly')
         run: |
           git clone https://github.com/google/tunix.git ./tunix
           git clone https://github.com/vllm-project/vllm.git ./vllm
@@ -127,38 +135,89 @@ jobs:
             LIBTPU_VERSION=NONE
             INCLUDE_TEST_ASSETS=true
 
+  test:
+    needs: build_and_push
+    if: |
+      needs.build_and_push.result == 'success' &&
+      needs.build_and_push.outputs.should_run == 'true'
+    strategy:
+      fail-fast: false
+      matrix:
+        flavor: >-
+            ${{ fromJSON('{
+              "gpu-pre-training": ["gpu-unit", "gpu-integration"],
+              "tpu-post-training": ["post-training-tpu-unit", "post-training-tpu-integration", "post-training-cpu-unit"],
+              "tpu-pre-training": ["tpu-unit", "tpu-integration", "cpu-unit"]
+            }')[inputs.test_mode] }}
+    uses: ./.github/workflows/run_tests_coordinator.yml
+    with:
+      flavor: ${{ matrix.flavor }}
+      base_image: ${{ inputs.image_name }}:${{ github.run_id }}
+      is_scheduled_run: true
+      maxtext_installed: true
+
+  notebook-test:
+    needs: build_and_push
+    if: |
+      inputs.test_mode == 'tpu-post-training' &&
+      needs.build_and_push.result == 'success' &&
+      needs.build_and_push.outputs.should_run == 'true'
+    uses: ./.github/workflows/run_jupyter_notebooks.yml
+    with:
+      device_type: tpu
+      device_name: v6e-4
+      base_image: ${{ inputs.image_name }}:${{ github.run_id }}
+      cloud_runner: linux-x86-ct6e-180-4tpu
+      maxtext_installed: true
+    secrets:
+      HF_TOKEN: ${{ secrets.HF_TOKEN }}
+
+  tagging:
+    needs: [test, notebook-test]
+    if: |
+      always() &&
+      needs.test.result == 'success' &&
+      (needs.notebook-test.result == 'success' || needs.notebook-test.result == 'skipped')
+    runs-on: linux-x86-n2-16-buildkit
+    container: google/cloud-sdk:524.0.0
+    steps:
+      - name: Configure Docker
+        run: gcloud auth configure-docker us-docker.pkg.dev,gcr.io -q
+
       - name: Add tags to Docker image
-        if: steps.check.outputs.should_run == 'true'
         shell: bash
         run: |
           SOURCE_IMAGE="gcr.io/tpu-prod-env-multipod/${INPUTS_IMAGE_NAME}"
+          TEMP_IMG="${SOURCE_IMAGE}:${{ github.run_id }}"
 
           if [[ $INPUTS_VERSION_NAME ]]; then
             echo "Tagging docker images corresponding to PyPI release..."
-            gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${INPUTS_VERSION_NAME}" --quiet
+            gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:${INPUTS_VERSION_NAME}" --quiet
           else
             echo "Tagging docker images corresponding to nightly release..."
 
             # Add date tag
-            gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${INPUTS_IMAGE_DATE}" --quiet
+            gcloud container images add-tag "${TEMP_IMG}" "$SOURCE_IMAGE:${INPUTS_IMAGE_DATE}" --quiet
 
             # Convert date to YYYYMMDD format
             clean_date=$(echo "${INPUTS_IMAGE_DATE}" | sed 's/[-:]//g' | cut -c1-8)
 
             # Add MaxText tag
             maxtext_hash=$(git rev-parse --short HEAD)
-            gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:maxtext_${maxtext_hash}_${clean_date}" --quiet
-
-          # Add post-training dependencies tags
-          if [ "${{ inputs.workflow }}" == "post-training" ]; then
-            for dir in tunix vllm tpu-inference; do
-              if [ -d "./$dir" ]; then
-                dir_hash=$(git -C "$dir" rev-parse --short HEAD)
-                gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${dir}_${dir_hash}_${clean_date}" --quiet
+            gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:maxtext_${maxtext_hash}_${clean_date}" --quiet
+
+            # Add post-training dependencies tags
+            if [ "${{ inputs.workflow }}" == "post-training" ]; then
+              for dir in tunix vllm tpu-inference; do
+                if [ -d "./$dir" ]; then
+                  dir_hash=$(git -C "$dir" rev-parse --short HEAD)
+                  gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:${dir}_${dir_hash}_${clean_date}" --quiet
                 fi
               done
             fi
           fi
+          # Latest Tag
+          gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:latest" --quiet
         env:
           INPUTS_IMAGE_NAME: ${{ inputs.image_name }}
           INPUTS_IMAGE_DATE: ${{ inputs.image_date }}

@@ -123,3 +123,4 @@ jobs:
       dockerfile: ${{ matrix.dockerfile }}
       maxtext_sha: ${{ github.sha }}
       version_name: ${{ needs.get_latest_maxtext_pypi_version.outputs.latest_pypi_version }}
+      test_mode: ${{ matrix.device}}-${{ matrix.workflow }}
@@ -128,16 +128,21 @@ jobs:
           if [ "${INPUTS_MAXTEXT_INSTALLED}" == "true" ]; then
             # Move to the directory where code is baked into the image. See the Dockerfile.
             cd /deps
+            REPO_ROOT="/deps"
+
             PYTHON_EXE="python3"
             # Disable coverage flags when testing against a pre-installed package
             PYTEST_COV_ARGS=""
           else
+            REPO_ROOT="${{ github.workspace }}"
+
             # Use the local virtual environment created in Step 3
             PYTHON_EXE=".venv/bin/python3"
             # Ensure pytest-cov is available and enable coverage flags
             $PYTHON_EXE -m pip install --quiet pytest-cov
-            PYTEST_COV_ARGS="--cov=src/MaxText --cov=maxtext --cov-report=xml --cov-report=term"
+            PYTEST_COV_ARGS="--cov=src/MaxText --cov=src/maxtext --cov-report=xml --cov-report=term"
           fi
+          export PYTHONPATH="${REPO_ROOT}/src${PYTHONPATH:+:${PYTHONPATH}}"
 
           if [ "${INPUTS_IS_SCHEDULED_RUN}" == "true" ]; then
             FINAL_PYTEST_MARKER="${INPUTS_PYTEST_MARKER}"
@@ -165,16 +170,16 @@ jobs:
             --durations=0 \
             $PYTEST_COV_ARGS \
             $SPLIT_ARGS \
-            ${INPUTS_PYTEST_EXTRA_ARGS}
+            ${INPUTS_PYTEST_EXTRA_ARGS} \
+            ./tests ./src
 
         env:
-          PYTHONPATH: "${{ github.workspace }}/src"
           INPUTS_IS_SCHEDULED_RUN: ${{ inputs.is_scheduled_run }}
           INPUTS_PYTEST_MARKER: ${{ inputs.pytest_marker }}
           INPUTS_DEVICE_TYPE: ${{ inputs.device_type }}
           INPUTS_PYTEST_ADDOPTS: ${{ inputs.pytest_addopts }}
           INPUTS_TOTAL_WORKERS: ${{ inputs.total_workers }}
-          INPUTS_WORKER_GROUP: ${{ inputs.total_workers }}
+          INPUTS_WORKER_GROUP: ${{ inputs.worker_group }}
           INPUTS_PYTEST_EXTRA_ARGS: ${{ inputs.pytest_extra_args }}
           INPUTS_MAXTEXT_INSTALLED: ${{ inputs.maxtext_installed }}
       - name: Upload results to Codecov