diff --git a/.github/workflows/currency-build.yaml b/.github/workflows/currency-build.yaml index 0b1cab96d0..2aa1b50045 100644 --- a/.github/workflows/currency-build.yaml +++ b/.github/workflows/currency-build.yaml @@ -45,7 +45,7 @@ run-name: Currency Build ${{ inputs.package_name }} && Unique ID ${{ inputs.uniq jobs: build_info: - runs-on: ubuntu-24.04-ppc64le-p10 + runs-on: ubuntu-24.04-ppc64le env: PACKAGE_NAME: ${{ inputs.package_name }} VERSION: ${{ inputs.version }} @@ -55,6 +55,8 @@ jobs: ENABLE_TRIVY: ${{ inputs.enable_trivy }} ENABLE_SYFT: ${{ inputs.enable_syft }} ENABLE_GRYPE: ${{ inputs.enable_grype }} + COS_BUCKET: ${{secrets.COS_BUCKET}} + COS_ENDPOINT: ${{secrets.COS_ENDPOINT}} steps: - name: Checkout code uses: actions/checkout@v6 @@ -71,6 +73,8 @@ jobs: - name: Get Build Info and Save Variables run: | + echo "==== ${COS_BUCKET: -1} ======" + echo "==== $COS_ENDPOINT =====" chmod +x ./gha-script/read_buildinfo.sh bash ./gha-script/read_buildinfo.sh @@ -108,10 +112,10 @@ jobs: build: needs: build_info if: ${{ inputs.validate_build_script == 'true' }} - runs-on: ubuntu-24.04-ppc64le-p10 + runs-on: ubuntu-24.04-ppc64le steps: - uses: actions/checkout@v6 - + - name: Download package-cache uses: actions/download-artifact@v7 with: @@ -172,7 +176,7 @@ jobs: needs: build_info name: Create Wheel for Python ${{ matrix.python-version }} if: ${{ inputs.wheel_build == 'true' }} - runs-on: ubuntu-24.04-ppc64le-p10 + runs-on: ubuntu-24.04-ppc64le strategy: fail-fast: false matrix: @@ -192,9 +196,16 @@ jobs: PYTHON_VERSION: ${{ matrix.python-version }} PACKAGE_NAME: ${{ inputs.package_name }} VERSION: ${{ inputs.version }} + COS_API_KEY: ${{ secrets.COS_API_KEY }} + COS_BUCKET: ${{secrets.COS_BUCKET}} + COS_ENDPOINT: ${{secrets.COS_ENDPOINT}} + COS_SERVICE_INSTANCE_ID: ${{secrets.COS_SERVICE_INSTANCE_ID}} + steps: - name: Checkout code uses: actions/checkout@v6 + with: + fetch-depth: 0 - name: Install system dependencies run: | @@ -268,7 +279,7 @@ jobs: wheel_licenses: needs: wheel_build - runs-on: ubuntu-24.04-ppc64le-p10 + runs-on: ubuntu-24.04-ppc64le steps: - uses: actions/checkout@v6 @@ -318,7 +329,7 @@ jobs: source_scanner: needs: build if: ${{ inputs.validate_build_script == 'true' }} - runs-on: ubuntu-24.04-ppc64le-p10 + runs-on: ubuntu-24.04-ppc64le steps: - uses: actions/checkout@v6 @@ -392,7 +403,7 @@ jobs: build_docker: needs: build_info if: ${{ inputs.build_docker == 'true' }} - runs-on: ubuntu-24.04-ppc64le-p10 + runs-on: ubuntu-24.04-ppc64le steps: - uses: actions/checkout@v6 @@ -436,7 +447,7 @@ jobs: image_scanner: needs: build_docker if: ${{ inputs.build_docker == 'true' }} - runs-on: ubuntu-24.04-ppc64le-p10 + runs-on: ubuntu-24.04-ppc64le steps: - uses: actions/checkout@v6 @@ -506,7 +517,7 @@ jobs: final_summary: name: Final Summary Stage needs: [build, source_scanner] - runs-on: ubuntu-24.04-ppc64le-p10 + runs-on: ubuntu-24.04-ppc64le steps: - name: Checkout code uses: actions/checkout@v6 diff --git a/gha-script/build_wheels.py b/gha-script/build_wheels.py index bb80f2957a..aaf2e6602e 100644 --- a/gha-script/build_wheels.py +++ b/gha-script/build_wheels.py @@ -6,7 +6,7 @@ import docker import json -def trigger_build_wheel(wrapper_file, python_version, image_name, file_name, version): +def trigger_build_wheel(wrapper_file, python_version, image_name, file_name, version, post_process_file): # Docker client setup client = docker.DockerClient(base_url='unix://var/run/docker.sock') @@ -28,7 +28,7 @@ def trigger_build_wheel(wrapper_file, python_version, image_name, file_name, ver command = [ "bash", "-c", - f"cd /home/tester/ && ./{wrapper_file} {python_version} {file_name} {version}" + f"cd /home/tester/ && ./{wrapper_file} {python_version} {file_name} {version} {post_process_file}" ] # Run container @@ -39,7 +39,13 @@ def trigger_build_wheel(wrapper_file, python_version, image_name, file_name, ver detach=True, volumes={current_dir: {'bind': '/home/tester/', 'mode': 'rw'}}, # Mount current directory with both files stderr=True, - stdout=True + stdout=True, + environment={ + "COS_API_KEY": os.getenv("COS_API_KEY"), + "COS_SERVICE_INSTANCE_ID": os.getenv("COS_SERVICE_INSTANCE_ID"), + "COS_ENDPOINT": os.getenv("COS_ENDPOINT"), + "COS_BUCKET": os.getenv("COS_BUCKET") + } ) # STREAM logs in real-time @@ -69,4 +75,4 @@ def trigger_build_wheel(wrapper_file, python_version, image_name, file_name, ver if __name__=="__main__": print("Inside python program") - trigger_build_wheel(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5]) + trigger_build_wheel(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6]) diff --git a/gha-script/build_wheels.sh b/gha-script/build_wheels.sh index f49a9cecdb..6f51c6e950 100644 --- a/gha-script/build_wheels.sh +++ b/gha-script/build_wheels.sh @@ -38,6 +38,7 @@ else fi WHEEL_SCRIPT=gha-script/create_wheel_wrapper.sh +POST_PROCESS_SCRIPT_PATH=gha-script/post_process_wheel.py #python3 gha-script/build_wheels.py "$WHEEL_SCRIPT" "$PYTHON_VERSION" "$docker_image" "$PKG_DIR_PATH$BUILD_SCRIPT" "$VERSION" > build_log & # SCRIPT_PID=$! @@ -47,7 +48,7 @@ WHEEL_SCRIPT=gha-script/create_wheel_wrapper.sh # sleep 100 # done # wait $SCRIPT_PID -python3 gha-script/build_wheels.py "$WHEEL_SCRIPT" "$PYTHON_VERSION" "$docker_image" "$PKG_DIR_PATH$BUILD_SCRIPT" "$VERSION" 2>&1 | tee wheel_build_log +python3 gha-script/build_wheels.py "$WHEEL_SCRIPT" "$PYTHON_VERSION" "$docker_image" "$PKG_DIR_PATH$BUILD_SCRIPT" "$VERSION" "$POST_PROCESS_SCRIPT_PATH" 2>&1 | tee wheel_build_log wheel_status=${PIPESTATUS[0]} log_size=$(stat -c %s wheel_build_log) diff --git a/gha-script/create_wheel_wrapper.sh b/gha-script/create_wheel_wrapper.sh index 7e4fb8ab75..f51c7f6960 100644 --- a/gha-script/create_wheel_wrapper.sh +++ b/gha-script/create_wheel_wrapper.sh @@ -1,48 +1,49 @@ #!/bin/bash -e +# variables PYTHON_VERSION=$1 BUILD_SCRIPT_PATH=${2:-""} -EXTRA_ARGS="${@:3}" # Capture all additional arguments passed to the script -CURRENT_DIR="${PWD}" -EXIT_CODE=0 +EXTRA_ARGS=${3:-""} +POST_PROCESS_SCRIPT_PATH=${4:-"post_process_wheel.py"} +CURRENT_DIR=$(pwd) -#install gcc -yum install -y gcc-toolset-13 zip unzip + +# install gcc +yum install -y gcc-toolset-13 source /opt/rh/gcc-toolset-13/enable gcc --version -# Temporary build script path + +# temporary build script path if [ -n "$BUILD_SCRIPT_PATH" ]; then TEMP_BUILD_SCRIPT_PATH="temp_build_script.sh" else TEMP_BUILD_SCRIPT_PATH="" fi -# Function to install a specific Python version + +# function to install a specific Python version install_python_version() { local version=$1 - echo "Installing Python version: $version" + echo + echo "==================== Installing Python version: $version ====================" + echo case $version in - "3.9" | "3.11" | "3.12") - echo "Starting python installing..." + "3.9") + yum install -y python3 python3-devel python3-pip + ;; + "3.11" | "3.12") yum install -y python${version} python${version}-devel python${version}-pip ;; "3.10") if ! python3.10 --version &>/dev/null; then - echo "Installing dependencies required for python installation..." - yum install -y sudo zlib-devel wget ncurses git - echo "Installing..." - yum install -y make cmake openssl-devel - echo "Installing..." + yum install -y sudo zlib-devel wget ncurses git make cmake openssl-devel xz xz-devel yum install -y libffi libffi-devel sqlite sqlite-devel sqlite-libs bzip2-devel - echo "Starting python installing..." wget https://www.python.org/ftp/python/3.10.15/Python-3.10.15.tgz tar xf Python-3.10.15.tgz cd Python-3.10.15 ./configure --prefix=/usr/local --enable-optimizations - echo "Still building..." make -j2 - echo "Still building..." make altinstall echo "Completed..." cd .. && rm -rf Python-3.10.15.tgz @@ -50,22 +51,14 @@ install_python_version() { ;; "3.13") if ! python3.13 --version &>/dev/null; then - echo "Installing dependencies required for python installation..." - yum install -y sudo zlib-devel wget ncurses git - echo "Installing..." - yum install -y make cmake openssl-devel - echo "Installing..." + yum install -y sudo zlib-devel wget ncurses git make cmake openssl-devel xz xz-devel yum install -y libffi libffi-devel sqlite sqlite-devel sqlite-libs bzip2-devel - echo "Starting python installing..." wget https://www.python.org/ftp/python/3.13.0/Python-3.13.0.tgz tar xzf Python-3.13.0.tgz cd Python-3.13.0 ./configure --prefix=/usr/local --enable-optimizations - echo "Still building..." make -j2 - echo "Still building..." make altinstall - echo "Completed..." cd .. && rm -rf Python-3.13.0.tgz fi ;; @@ -76,15 +69,17 @@ install_python_version() { esac } -# Install the specified Python version + +# install the specified Python version install_python_version "$PYTHON_VERSION" -# Function to copy and format the build script + +# function to copy and format the build script format_build_script() { if [ -n "$BUILD_SCRIPT_PATH" ]; then cp "$BUILD_SCRIPT_PATH" "$TEMP_BUILD_SCRIPT_PATH" - # Modify the build script for compatibility + # modify the build script for compatibility sed -i 's/\bpython[0-9]\+\.[0-9]\+ -m pip /pip /g' "$TEMP_BUILD_SCRIPT_PATH" sed -i 's/python[0-9]\+\.[0-9]\+/python/g' "$TEMP_BUILD_SCRIPT_PATH" sed -i 's/python3 /python /g' "$TEMP_BUILD_SCRIPT_PATH" @@ -102,16 +97,18 @@ format_build_script() { fi } -# Function to create a virtual environment + +# function to create a virtual environment create_venv() { local VENV_DIR=$1 local python_version=$2 - "python$python_version" -m venv --system-site-packages "$VENV_DIR" + "python$python_version" -m venv "$VENV_DIR" source "$VENV_DIR/bin/activate" } -# Function to clean up the virtual environment + +# function to clean up the virtual environment cleanup() { local VENV_DIR=$1 @@ -119,141 +116,248 @@ cleanup() { rm -rf "$VENV_DIR" } -# Function to modify the metadata file after wheel creation -modify_metadata_file() { - local wheel_path="$1" - - # Create a temporary directory for unzipping the wheel file - temp_dir="temp_directory" - mkdir -p "$temp_dir" - # Extract wheel to temp directory - unzip -q "$wheel_path" -d "$temp_dir" +# function to create SHA256 for wheel +generate_sha() { + local build_script=$1 + local python_version=$2 + local cur_dir=$3 + local wheel=$4 + + # Mark repo as safe (to avoid dubious ownership issue) + git config --global --add safe.directory $cur_dir - # Find metadata file - local metadata_file - metadata_file=$(find "$temp_dir" -name METADATA -path "*.dist-info/*") + BUILD_SCRIPT_DATE=$(git log -1 --format=%ci -- "${build_script}") + PACKAGE_LANGUAGE=${PACKAGE_LANGUAGE:-python} - # New classifier to add - local new_classifier="Classifier: Environment :: MetaData :: IBM Python Ecosystem" + # Check required variables + : "${PACKAGE_NAME:?PACKAGE_NAME is required}" + : "${PACKAGE_VERSION:?PACKAGE_VERSION is required}" + : "${BUILD_SCRIPT_DATE:?BUILD_SCRIPT_DATE is required}" - # Only proceed if the classifier is not already present - if grep -q "^$new_classifier$" "$metadata_file"; then - echo "Classifier already exists in $wheel_path — no changes made." + if [[ "$wheel" == *any.whl ]]; then + string_to_hash="${PACKAGE_NAME}_${PACKAGE_VERSION}_${PACKAGE_LANGUAGE}_${BUILD_SCRIPT_DATE}" else - awk -v new_classifier="$new_classifier" ' - BEGIN { - found_classifier = 0 - output = "" - } - /^Classifier:/ { - found_classifier = 1 - last_classifier_line = NR - } - { - lines[NR] = $0 - } - END { - if (found_classifier) { - for (i = 1; i <= NR; i++) { - print lines[i] - if (i == last_classifier_line) { - print new_classifier - } - } - } else { - print new_classifier - for (i = 1; i <= NR; i++) { - print lines[i] - } - } - } - ' "$metadata_file" > "$metadata_file.tmp" && mv "$metadata_file.tmp" "$metadata_file" - - # Get the original wheel file name - wheel_file_name=$(basename "$wheel_path") - - # Repack wheel - cd "$temp_dir" && zip -q -r "$CURRENT_DIR/$wheel_file_name" ./* - - echo "Added IBM classifier to $wheel_path" + : "${python_version:?python_version is required}" + string_to_hash="${PACKAGE_NAME}_${PACKAGE_VERSION}_${PACKAGE_LANGUAGE}_${python_version}_${BUILD_SCRIPT_DATE}" fi - # Clean up - rm -rf "$CURRENT_DIR/$temp_dir" + + SHA_VALUE=$(echo -n "$string_to_hash" | sha256sum | awk '{print $1}') + + echo "$SHA_VALUE" > "$cur_dir/sha256.sha" + + echo + echo "===> SHA256 successfully generated for $string_to_hash " + echo "===> SHA256: $SHA_VALUE " + echo } -# Format the build script if it's non-empty + +# format the build script if it's non-empty if [ -n "$BUILD_SCRIPT_PATH" ]; then format_build_script fi -echo "Processing Package with Python $PYTHON_VERSION" -# Create and activate virtual environment +# create and activate virtual environment VENV_DIR="$CURRENT_DIR/pyvenv_$PYTHON_VERSION" create_venv "$VENV_DIR" "$PYTHON_VERSION" -echo "=============== Running package build-script starts ==================" +echo +echo "==================== Running package build-script starts ====================" +echo if [ -n "$TEMP_BUILD_SCRIPT_PATH" ]; then - echo "Installing required dependencies..." python$PYTHON_VERSION -m pip install --upgrade pip wheel build pytest nox tox requests setuptools - echo "Installing required dependencies completed..." package_dir=$(grep -oP '(?<=^PACKAGE_DIR=).*' "$TEMP_BUILD_SCRIPT_PATH" | tr -d '"') package_url=$(grep -oP '(?<=^PACKAGE_URL=).*' "$TEMP_BUILD_SCRIPT_PATH" | tr -d '"') package_name=$(basename "$package_url" .git) - echo "Running the build script..." source "$TEMP_BUILD_SCRIPT_PATH" "$EXTRA_ARGS" - -else - echo "No build script to run, skipping execution." fi -#checking if wheel is generated through script itself + +# checking if wheel is generated through script itself cd $CURRENT_DIR if ls *.whl 1>/dev/null 2>&1; then - echo "Wheel file already exist in the current directory:" - ls *.whl + echo + echo "===> Wheel file already exists in the current directory: $(ls *.whl)" + echo else - #Navigating to the package directory to build wheel + + # to handle where setup.py or pyproject.toml file is present if [ -d "$package_dir" ]; then - echo "Navigating to the package directory: $package_dir" + echo + echo "===> Navigating to the package directory: $package_dir" + echo cd "$package_dir" else - echo "package_dir not found, Navigating to package_name: $package_name" + echo + echo "===> Package_dir not found, navigating to package_name: $package_name" + echo cd "$package_name" fi - echo "=============== Building wheel ==================" + echo + echo "==================== Building wheel ====================" + echo - # Attempt to build the wheel without isolation + # wheel creation without isolation if ! python -m build --wheel --no-isolation --outdir="$CURRENT_DIR/"; then - echo "============ Wheel Creation Failed for Python $PYTHON_VERSION (without isolation) =================" - echo "Attempting to build with isolation..." + + echo + echo "===> Wheel Creation Failed for Python $PYTHON_VERSION (without isolation)" + echo - # Attempt to build the wheel without isolation + # wheel creation with isolation if ! python -m build --wheel --outdir="$CURRENT_DIR/"; then - echo "============ Wheel Creation Failed for Python $PYTHON_VERSION =================" - EXIT_CODE=1 + echo + echo "===> Wheel Creation Failed for Python $PYTHON_VERSION" + echo + exit 1 fi fi fi -cd $CURRENT_DIR -if ls *.whl 1>/dev/null 2>&1; then - echo "=============== Modifying Metadata file ==================" - #add modifying metadata file - wheel_file=$(ls *.whl 1>/dev/null 2>&1 && echo *.whl) - modify_metadata_file "$wheel_file" + +cd "$CURRENT_DIR" +shopt -s nullglob +wheels=("$CURRENT_DIR"/*.whl) +wheel_count=${#wheels[@]} +wheel_file="${wheels[0]}" + + +# check the wheel count in the current dir +if [ "$wheel_count" -ne 1 ]; then + echo + echo "===> ERROR: Expected exactly 1 wheel but found $wheel_count" + echo + exit 1 fi + +echo +echo "==== Running auditwheel repair on: ${wheel_file} ====" +echo + + +# install required tools +pip install auditwheel "patchelf>=0.14" + + +# location of repaired wheel +WHEELHOUSE="$CURRENT_DIR/wheelhouse" +mkdir -p "$WHEELHOUSE" + + +# run auditwheel +set +e +audit_output=$(auditwheel repair "$wheel_file" --wheel-dir "$WHEELHOUSE" --exclude libtensorflow_framework.so.2 --exclude libpython3.11.so.1.0 --exclude libpython3.10.so.1.0 --exclude libpython3.12.so.1.0 --exclude libpython3.13.so.1.0 --exclude libc10.so --exclude libtorch.so --exclude libtorch_cpu.so --exclude libtorch_python.so --exclude libshm.so --exclude libtorchaudio.so --exclude libtorchtext.so --exclude libavutil-ffmpeg.so.54 --exclude libavformat-ffmpeg.so.56 --exclude libswscale-ffmpeg.so.3 --exclude libavcodec-ffmpeg.so.56 --exclude libavformat.so.57 --exclude libswscale.so.4 --exclude libavutil.so.55 --exclude libswscale.so.5 --exclude libavformat.so.58 2>&1) +audit_status=$? +set -e + +echo +echo "===> Result of running auditwheel on the wheel:" +echo +echo "$audit_output" +echo + + +# error case +if echo "$audit_output" | grep -q "ValueError: Cannot repair wheel"; then + echo + echo "===>ERROR: Auditwheel failed to repair wheel: ${wheel_file}" + echo + exit 1 + +# skipped case (no-arch wheels) +elif echo "$audit_output" | grep -q "This does not look like a platform wheel"; then + + echo + echo "===> Auditwheel skipped for: ${wheel_file}" + echo + + if [[ "$wheel_file" == *any.whl ]]; then + echo + echo "===> Pure Python wheel detected. (No-arch wheel)" + echo + else + echo + echo "===> ERROR: Skipped wheel is not universal i.e(*any.whl)." + echo + exit 1 + fi + +# success case +elif [ "$audit_status" -eq 0 ]; then + echo + echo "===> Auditwheel succeeded for $wheel_file" + echo + + rm -f "$CURRENT_DIR"/*.whl + cp "$WHEELHOUSE"/*.whl "$CURRENT_DIR" + + echo + echo "===> Repaired wheel $(basename "$WHEELHOUSE"/*.whl) copied at $CURRENT_DIR" + echo + +# any other case +else + echo + echo "ERROR: Auditwheel failed." + echo + exit 1 +fi + + +cd "$CURRENT_DIR" +wheel_final=(*.whl) + + +echo +echo "============== Generating sha for: ${wheel_final} ==============" +echo + + + +# generate sha256 +generate_sha "$BUILD_SCRIPT_PATH" "$PYTHON_VERSION" "$CURRENT_DIR" "$wheel_final" + + +# install required dependencies for post_process_wheel.py +pip install ibm-cos-sdk +SHA256_VALUE=$(cat sha256.sha) + + +echo +echo "= Post Processing wheel ${wheel_final} with SHA: ${SHA256_VALUE} =" +echo + + +# post processing of wheels (Suffix addition, license addition, metadata addition) +if python ${POST_PROCESS_SCRIPT_PATH} ${wheel_final} ${SHA256_VALUE}; then + echo + echo "===> SUCCESS: Wheels post process successfully." + echo +else + echo + echo "===> ERROR: Failed to post process wheels." + echo + exit 1 +fi + + +echo +echo "============ Final wheel: $(ls -t *.whl 2>/dev/null | head -1) ===========" +echo + # Clean up virtual environment cleanup "$VENV_DIR" + # Remove temporary build script [ -n "$TEMP_BUILD_SCRIPT_PATH" ] && rm "$CURRENT_DIR/$TEMP_BUILD_SCRIPT_PATH" -exit $EXIT_CODE +exit 0 diff --git a/gha-script/post_process_wheel.py b/gha-script/post_process_wheel.py new file mode 100644 index 0000000000..b82049d5e6 --- /dev/null +++ b/gha-script/post_process_wheel.py @@ -0,0 +1,604 @@ +""" +post_process_wheel.py +This script performs post-processing of a built Python wheel before uploading it to IBM COS. + +Main responsibilities: +1. Unpack the wheel. +2. Detect bundled shared libraries (.so files) and extract their license information. +3. Inject license details into: + - UBI_BUNDLED_LICENSES.txt + - BUNDLED_LICENSES.txt +4. Update the wheel METADATA by injecting the classifier: + "Classifier: Environment :: MetaData :: IBM Python Ecosystem" +5. Determine the correct version suffix (+ppc64leN) by checking IBM COS: + - Compute SHA256 of the local wheel. + - Check if a wheel with the same name already exists in COS. + - If SHA matches → reuse suffix. + - If SHA differs → increment suffix (ppc64le1, ppc64le2, etc.). +6. Update the wheel version with the resolved suffix. +7. Regenerate the RECORD file with updated hashes. +8. Repack the wheel. + +Execution Flow: +Wheel Build → Auditwheel Repair → post_process_wheel.py → Upload to COS +This script is executed by the CI pipeline through create_wheel_wrapper.sh. +""" + +import os +import re +import shutil +import subprocess +import tempfile +import sys +import hashlib +import base64 +import ibm_boto3 +from ibm_botocore.client import Config +import logging + +logging.basicConfig( + level=logging.INFO, + format="[%(levelname)s] %(asctime)s - %(message)s" +) + +logger = logging.getLogger(__name__) + +# COS configuration +COS_API_KEY = os.environ["COS_API_KEY"] +COS_SERVICE_INSTANCE_ID = os.environ["COS_SERVICE_INSTANCE_ID"] +COS_ENDPOINT = os.environ["COS_ENDPOINT"] +COS_BUCKET = os.environ["COS_BUCKET"] + +# License extraction utilities +LICENSE_PATTERN = re.compile(r"^(LICENSE|COPYING)(\..*)?$") +LICENSE_SEPARATOR = "----" # Hardcoded separator for both files +# Metadata update utilities +CLASSIFIER = "Classifier: Environment :: MetaData :: IBM Python Ecosystem" +# Suffix configuration +BASE_SUFFIX = "ppc64le" + +def run_command(cmd): + # Run a command and return the result, with error handling + try: + return subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + check=False + ) + except Exception as e: + logger.error(f"Command failed → {cmd} : {e}") + return None + +def find_libs_dirs(root): + # Walk the directory tree to find all .libs directories + try: + return [ + os.path.join(dirpath, d) + for dirpath, dirnames, _ in os.walk(root) + for d in dirnames + if d.endswith(".libs") + ] + except Exception as e: + logger.error(f"Failed to find .libs directories → {e}") + return [] + +def collect_so_files(libs_dir): + # Collect .so files in a given .libs directory + try: + return [ + os.path.join(libs_dir, f) + for f in os.listdir(libs_dir) + if f.startswith("lib") and ".so" in f + ] + except Exception as e: + logger.error(f"Failed to collect .so files → {e}") + return [] + +def normalize_so_name(so_name): + # Normalize .so name by removing hash-like suffixes before .so or version numbers + return re.sub(r'-[0-9a-f]{8,}(?=(?:\.so|\.\d))', '', so_name) + +def find_all_so_anywhere(so_name): + # Search for the .so file anywhere in the filesystem + try: + result = run_command(["find", ".", "-type", "f", "-name", so_name]) + if len(result.stdout) == 0: + result = run_command(["find", "/", "-type", "f", "-name", so_name]) + return result.stdout.strip().splitlines() + except Exception as e: + logger.error(f"Failed to find .so files → {e}") + return [] + +def get_rpm_package(so_path): + # Get the RPM package that owns the given .so file + try: + logger.info(f"Searching for library → {so_path}") + result = run_command(["rpm", "-qf", so_path]) + if result.returncode == 0: + return result.stdout.strip() + return None + except Exception as e: + logger.error(f"RPM package lookup failed for {so_path} → {e}") + return None + +def get_rpm_license(pkg_name): + # Get the license of an RPM package + try: + result = run_command(["rpm", "-q", "--qf", "%{LICENSE}\n", pkg_name]) + if result.returncode == 0: + return result.stdout.strip() + return None + except Exception as e: + logger.error(f"RPM license lookup failed for {pkg_name} → {e}") + return None + +def find_project_root(so_path, max_up=10): + # Traverse up the directory tree to find a directory containing LICENSE file + try: + current = os.path.dirname(so_path) + for _ in range(max_up): + if not current: + break # stop if directory doesn't exist + + for f in os.listdir(current): + if LICENSE_PATTERN.match(f): + return current # found LICENSE/COPYING + + parent = os.path.dirname(current) + if parent == current: + break # reached root + current = parent + + return None # no license found + except Exception as e: + logger.error(f"Failed to find project root for {so_path} → {e}") + return None + + +def find_license_in_directory(directory): + # Look for LICENSE files in the given directory + try: + for f in os.listdir(directory): + if LICENSE_PATTERN.match(f): + return os.path.join(directory, f) + return None + except Exception as e: + logger.error(f"Failed to find license in directory {directory} → {e}") + return None + +def find_dist_info_dir(root): + # Look for the .dist-info directory in the given root + try: + for item in os.listdir(root): + if item.endswith(".dist-info"): + return os.path.join(root, item) + return None + except Exception as e: + logger.error(f"Failed to find .dist-info directory in {root} → {e}") + return None + +def append_license_entry(file_path, so_names, license_text): + # Append a license entry to the given file, with proper formatting + try: + logger.info(f"Appending license to {file_path} for files: {so_names}") + if os.path.exists(file_path) and os.path.getsize(file_path) > 0: + with open(file_path, "a", encoding="utf-8") as f: + f.write(f"\n\n\n{LICENSE_SEPARATOR}\n\n\n\n") + + with open(file_path, "a", encoding="utf-8") as f: + f.write(f"Files: {', '.join(so_names)}\n") + lines = license_text.strip("\n").splitlines() + if len(lines) > 1: + f.write("\n") + f.write(license_text) + if not license_text.endswith("\n"): + f.write("\n") + else: + f.write(f"License: {license_text.strip()}\n") + except Exception as e: + logger.error(f"Failed to append license entry → {e}") + +def compute_hash_and_size(file_path): + # Compute SHA256 hash and size of the given file + try: + with open(file_path, "rb") as f: + data = f.read() + digest = hashlib.sha256(data).digest() + hash_b64 = base64.urlsafe_b64encode(digest).rstrip(b'=').decode("utf-8") + size = len(data) + return f"sha256={hash_b64}", size + except Exception as e: + logger.error(f"Failed to compute hash and size for {file_path} → {e}") + return None, None + +def update_record(dist_info_dir, file_paths): + # Update the RECORD file with new hash and size for the given file paths + try: + logger.info(f"Updating RECORD file in {dist_info_dir}") + record_file = os.path.join(dist_info_dir, "RECORD") + if not os.path.exists(record_file): + logger.warning(f"RECORD file not found at {record_file}") + return + + with open(record_file, "r", encoding="utf-8") as f: + lines = f.read().splitlines() + + record_map = {line.split(",")[0]: line.split(",") for line in lines} + + for path in file_paths: + if not os.path.exists(path): + continue + relative_path = os.path.relpath(path, os.path.dirname(dist_info_dir)) + hash_val, size_val = compute_hash_and_size(path) + record_map[relative_path] = [relative_path, hash_val, str(size_val)] + + with open(record_file, "w", encoding="utf-8", newline="\n") as f: + for parts in record_map.values(): + f.write(",".join(parts) + "\n") + except Exception as e: + logger.exception(f"Failed to update RECORD file → {e}") + +def process_so_file(so_path, rpm_licenses, bundled_licenses): + # Determine the original .so name and normalized name for searching + try: + logger.info(f"Processing .so file: {so_path}") + original_name = os.path.basename(so_path) + normalized_name = normalize_so_name(original_name) + + # Track if a license was successfully found + license_found = False + + for match_so in find_all_so_anywhere(normalized_name): + if not match_so: + continue # skip empty paths + + # RPM license check + pkg = get_rpm_package(match_so) + if pkg: + license_text = get_rpm_license(pkg) + if license_text: + rpm_licenses.setdefault(license_text, []).append(original_name) + license_found = True + break # stop after successfully found RPM license + else: + # RPM package exists but license not found, continue to next path + continue + + # Bundled license check + project_root = find_project_root(match_so) + if project_root: + license_file = find_license_in_directory(project_root) + if license_file: + try: + with open(license_file, "r", encoding="utf-8", errors="ignore") as f: + bundled_licenses.setdefault(f.read(), []).append(original_name) + license_found = True + break # stop after successfully read bundled license + except Exception: + # Failed to read this license file, continue to next match_so + continue + + # Fallback if no license found in any path + if not license_found: + bundled_licenses.setdefault(f"{original_name}_license_not_found", []).append(original_name) + except Exception as e: + logger.exception(f"Error processing .so file {so_path} → {e}") + +# Wheel version suffix utilities +def read_version_from_metadata(dist_info_dir): + # Read the version from the METADATA file in the .dist-info directory + try: + metadata_path = os.path.join(dist_info_dir, "METADATA") + with open(metadata_path, "r", encoding="utf-8") as f: + for line in f: + if line.startswith("Version:"): + return line.split(":", 1)[1].strip() + raise RuntimeError("Version not found in METADATA") + except Exception as e: + logger.error(f"Failed to read version from METADATA → {e}") + return None + +def build_new_version(old_version, suffix): + # Build a new version string by appending the suffix + try: + if "+" in old_version: + base, local = old_version.split("+", 1) + return f"{base}+{local}{suffix}" + return f"{old_version}+{suffix}" + except Exception as e: + logger.error(f"Failed to build new version string → {e}") + return old_version # fallback to old version if error occurs + +def update_metadata_version(dist_info_dir, new_version): + # Update the Version field in the METADATA file with the new version + try: + metadata_path = os.path.join(dist_info_dir, "METADATA") + with open(metadata_path, "r", encoding="utf-8") as f: + lines = f.readlines() + with open(metadata_path, "w", encoding="utf-8") as f: + for line in lines: + if line.startswith("Version:"): + f.write(f"Version: {new_version}\n") + else: + f.write(line) + except Exception as e: + logger.error(f"Failed to update version in METADATA → {e}") + +def rename_dist_info_dir(extract_path, old_version, new_version): + # Rename the .dist-info directory to reflect the new version + try: + for entry in os.listdir(extract_path): + if entry.endswith(".dist-info") and old_version in entry: + old_path = os.path.join(extract_path, entry) + new_entry = entry.replace(old_version, new_version) + new_path = os.path.join(extract_path, new_entry) + os.rename(old_path, new_path) + return new_path + raise RuntimeError("Failed to rename .dist-info directory") + except Exception as e: + logger.error(f"Failed to rename .dist-info directory → {e}") + return None + +def _hash_file(path): + # Compute SHA256 hash of the given file and return in the format required by RECORD + try: + h = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + h.update(chunk) + return "sha256=" + h.digest().hex() + except Exception as e: + logger.error(f"Failed to hash file {path} → {e}") + return None + +def regenerate_record(extract_path, dist_info_dir): + # Regenerate the RECORD file with updated hashes and sizes for all files in the wheel + try: + logger.info("Regenerating RECORD file with updated hashes and sizes") + record_path = os.path.join(dist_info_dir, "RECORD") + records = [] + + for root, _, files in os.walk(extract_path): + for fname in files: + full_path = os.path.join(root, fname) + rel_path = os.path.relpath(full_path, extract_path) + rel_path = rel_path.replace(os.sep, "/") + + if rel_path.endswith("RECORD"): + records.append(f"{rel_path},,") + continue + + size = os.path.getsize(full_path) + digest = _hash_file(full_path) + records.append(f"{rel_path},{digest},{size}") + + with open(record_path, "w", encoding="utf-8") as f: + f.write("\n".join(records)) + except Exception as e: + logger.exception(f"Failed to regenerate RECORD file → {e}") + +def resolve_suffix(client, package, version, wheel_name, wheel_sha256): + # Resolve a unique suffix for the wheel based on its name and local hash + try: + logger.info(f"Resolving suffix for package={package}, version={version}, wheel={wheel_name}") + parts = wheel_name[:-4].rsplit("-", 3) + pkg_ver = parts[0] + remainder = "-".join(parts[1:]) + + pkg, ver = pkg_ver.rsplit("-", 1) + + base_keys = [ + f"{package}/v{version}", + f"{package}/{version}" + ] + + n = 1 + while True: + suffix = f"{BASE_SUFFIX}{n}" + candidate = f"{pkg}-{ver}+{suffix}-{remainder}.whl" + found = False + response = None + for base in base_keys: + cos_key = f"{base}/{candidate}" + logger.info(f"Checking COS object → {cos_key}") + try: + response = client.head_object( + Bucket=COS_BUCKET, + Key=cos_key + ) + found = True + break + except Exception: + pass + + if not found: + return suffix + + remote_sha = ( + response.get("Metadata", {}).get("sha256") + or response.get("Metadata", {}).get("Sha256") + ) + logger.info(f"Found existing object in COS with SHA256 → {remote_sha}") + logger.info(f"Comparing COS SHA={remote_sha} with build SHA={wheel_sha256}") + # CASE 1 - COS object exists but SHA metadata missing + if remote_sha is None: + logger.info("COS SHA metadata missing → using suffix ppc64le1") + return suffix + # CASE 2 - SHA matches + if remote_sha and remote_sha.strip() == wheel_sha256.strip(): + logger.info(f"SHA match → reusing suffix {suffix}") + return suffix + # CASE 3 - SHA mismatch → try next suffix + logger.info(f"SHA mismatch → trying next suffix") + n += 1 + except Exception as e: + logger.error(f"Suffix resolution failed → {e}") + return f"{BASE_SUFFIX}1" + + +def inject_classifier(dist_info): + # Inject the required classifier into the METADATA file if not already present + try: + logger.info(f"Injecting classifier into METADATA at {dist_info}") + metadata_file = os.path.join(dist_info, "METADATA") + + with open(metadata_file, "r", encoding="utf-8") as f: + lines = f.readlines() + + if any(CLASSIFIER in line for line in lines): + return + + classifier_indexes = [i for i,l in enumerate(lines) if l.startswith("Classifier:")] + project_indexes = [i for i,l in enumerate(lines) if l.startswith("Project-URL:")] + + insert_at = 0 + + if classifier_indexes: + insert_at = classifier_indexes[-1] + 1 + elif project_indexes: + insert_at = project_indexes[-1] + 1 + + lines.insert(insert_at, CLASSIFIER + "\n") + + with open(metadata_file, "w", encoding="utf-8") as f: + f.writelines(lines) + except Exception as e: + logger.error(f"Failed to inject classifier → {e}") + +# Main processing function +def process_wheel(wheel_path, suffix): + try: + logger.info(f"Processing wheel: {wheel_path} with suffix: {suffix}") + wheel_dir = os.path.dirname(wheel_path) + wheel_name = os.path.basename(wheel_path) + + with tempfile.TemporaryDirectory() as tmpdir: + # Unpack wheel + logger.info(f"Unpacking wheel: {wheel_path}") + subprocess.run(["wheel", "unpack", wheel_path, "-d", tmpdir], check=True) + dirs = [d for d in os.listdir(tmpdir) if os.path.isdir(os.path.join(tmpdir, d))] + if len(dirs) != 1: + raise RuntimeError(f"Unexpected unpack layout. Found directories: {dirs}") + extract_path = os.path.join(tmpdir, dirs[0]) + + # License processing + logger.info("Starting license extraction and injection") + rpm_licenses = {} + bundled_licenses = {} + + libs_dirs = find_libs_dirs(extract_path) + + if libs_dirs: + logger.info(f"Found {len(libs_dirs)} .libs directorys → scanning for shared libraries") + for libs_dir in libs_dirs: + so_files = collect_so_files(libs_dir) + + if not so_files: + logger.info(f"No .so files found in {libs_dir}") + + for so_file in so_files: + process_so_file(so_file, rpm_licenses, bundled_licenses) + else: + logger.info(".libs directory not found, No .so files were added, skipping adding licenses") + + dist_info = find_dist_info_dir(extract_path) + old_version = None + if dist_info: + inject_classifier(dist_info) + ubi_path = os.path.join(dist_info, "UBI_BUNDLED_LICENSES.txt") + bundled_path = os.path.join(dist_info, "BUNDLED_LICENSES.txt") + + for license_text, files in rpm_licenses.items(): + append_license_entry(ubi_path, files, license_text) + for license_text, files in bundled_licenses.items(): + append_license_entry(bundled_path, files, license_text) + + existing_license_files = [p for p in [ubi_path, bundled_path] if os.path.exists(p)] + if existing_license_files: + update_record(dist_info, existing_license_files) + + # Version suffix processing + old_version = read_version_from_metadata(dist_info) + new_version = build_new_version(old_version, suffix) + update_metadata_version(dist_info, new_version) + dist_info = rename_dist_info_dir(extract_path, old_version, new_version) + regenerate_record(extract_path, dist_info) + + # Pack wheel + subprocess.run(["wheel", "pack", extract_path, "-d", wheel_dir], check=True) + + new_wheel_name = wheel_name + if "+" in old_version: + base, local = old_version.split("+", 1) + new_wheel_name = wheel_name.replace(f"{base}+{local}", f"{base}+{local}{suffix}", 1) + else: + new_wheel_name = wheel_name.replace(old_version, f"{old_version}+{suffix}", 1) + + new_wheel_path = os.path.join(wheel_dir, new_wheel_name) + os.remove(wheel_path) + logger.info("Processing wheel completed") + return new_wheel_path + except Exception as e: + logger.error(f"Failed to process wheel → {e}") + return None + +def sha256_file(path): + # Compute SHA256 hash of the given file and return as hex string + try: + h = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + h.update(chunk) + return h.hexdigest() + except Exception as e: + logger.error(f"Failed to compute SHA256 for {path} → {e}") + return None + +def create_cos_client(): + # Create and return an IBM COS client using the provided configuration + try: + return ibm_boto3.client( + "s3", + ibm_api_key_id=COS_API_KEY, + ibm_service_instance_id=COS_SERVICE_INSTANCE_ID, + config=Config(signature_version="oauth"), + endpoint_url=COS_ENDPOINT, + ) + except Exception as e: + logger.error(f"Failed to create COS client → {e}") + return None + +def main(): + if len(sys.argv) != 3: + logger.error("Usage: python post_process_wheel.py ") + sys.exit(1) + + wheel_path = sys.argv[1] + wheel_sha256 = sys.argv[2] + + # Resolve suffix using COS + wheel_name = os.path.basename(wheel_path) + parts = wheel_name.split("-") + package = parts[0] + version = parts[1] + + client = create_cos_client() + if client is None: + logger.error("COS client creation failed") + sys.exit(1) + + suffix = resolve_suffix( + client, + package, + version, + wheel_name, + wheel_sha256 + ) + + new_wheel = process_wheel(wheel_path, suffix) + logger.info(f"Wheel updated: {new_wheel}") + +if __name__ == "__main__": + main() diff --git a/gha-script/read_buildinfo.sh b/gha-script/read_buildinfo.sh index ed7bd096af..8e708dfa6b 100755 --- a/gha-script/read_buildinfo.sh +++ b/gha-script/read_buildinfo.sh @@ -144,6 +144,10 @@ if [ -f "$stripped_build_script" ]; then done < "$stripped_build_script" echo "Tested on value: $tested_on" + + BUILD_SCRIPT_DATE=$(git log -1 --format=%ci -- "${stripped_build_script}") + echo "======== $BUILD_SCRIPT_DATE ============" + fi # Export variables