@@ -367,3 +367,68 @@ jobs:
367367 "description": "'$status'",
368368 "context": "github-actions/build"
369369 }'
370+
371+ run-xpk :
372+ runs-on : ubuntu-latest
373+ defaults :
374+ run :
375+ working-directory : checkpoint
376+ permissions :
377+ id-token : write # Required for Workload Identity Federation
378+ contents : read # To checkout the repo
379+
380+ steps :
381+ - name : Checkout repository
382+ uses : actions/checkout@v4
383+
384+ - name : Set up Python
385+ uses : actions/setup-python@v5
386+ with :
387+ python-version : ' 3.10'
388+
389+ - name : Install gcloud CLI
390+ run : |
391+ pip install --no-cache-dir -e .
392+ pip install --no-cache-dir -e .[testing] -f https://storage.googleapis.com/jax-releases/libtpu_releases.html
393+ pip uninstall -y orbax
394+ pip install -U jax jaxlib
395+ gcloud --version
396+ # gcloud should be pre-installed on ubuntu-latest runners, but ensure it's available.
397+ # Otherwise, you might need a step to install it:
398+ # - uses: google-github-actions/setup-gcloud@v2
399+
400+
401+ - name : Install xpk
402+ run : |
403+ pip install xpk
404+
405+ - name : Run xpk Commands
406+ run : |
407+
408+ python3 orbax/checkpoint/_src/testing/benchmarks/xpk/launch_xpk.py \
409+ --cluster_name=test-github \
410+ --project=orbax-checkpoint \
411+ --zone=us-central1-a \
412+ --tpu_type=v5litepod-16 \
413+ --num_slices=1 \
414+ --docker_image=gcr.io/orbax-checkpoint/orbax-benchmarks:stable-v35 \
415+ --config_file orbax/checkpoint/_src/testing/benchmarks/configs/emergency_checkpoint_manager_benchmark.yaml \
416+ --output_directory gs://orbax-benchmarks/xpk/${{ github.run_id }}
417+
418+ # Export necessary environment variables for xpk and gcloud
419+ # echo "GKE_CLUSTER_NAME=${{ secrets.GKE_CLUSTER_NAME }}" >> $GITHUB_ENV
420+ # echo "GCP_PROJECT_ID=${{ secrets.GCP_PROJECT_ID }}" >> $GITHUB_ENV
421+ # echo "GCP_ZONE=${{ secrets.GCP_ZONE }}" >> $GITHUB_ENV
422+ # echo "YOUR_WORKLOAD_IMAGE=${{ secrets.YOUR_WORKLOAD_IMAGE }}" >> $GITHUB_ENV
423+
424+ # Example xpk command to create a workload
425+ # Add more xpk commands as needed, potentially in a separate script
426+ # python3 xpk_repo/xpk.py workload list --cluster=$GKE_CLUSTER_NAME --zone=$GCP_ZONE --project=$GCP_PROJECT_ID
427+
428+ - name : Capture Logs (Optional)
429+ if : failure()
430+ run : |
431+ # Example: Capture logs if the xpk command fails
432+ # This depends on how xpk logs and if you can access them.
433+ echo "XPK command failed. Checking for logs..."
434+ # Add commands to retrieve relevant logs from GKE or xpk
0 commit comments