diff --git a/.github/workflows/pxf-ci.yml b/.github/workflows/pxf-ci.yml index 1195d060..86891553 100644 --- a/.github/workflows/pxf-ci.yml +++ b/.github/workflows/pxf-ci.yml @@ -146,7 +146,15 @@ jobs: with: path: cloudberry-pxf + - name: Cache singlecluster image + id: cache-image + uses: actions/cache@v4 + with: + path: /tmp/singlecluster-image.tar + key: singlecluster-ubuntu-${{ hashFiles('ci/singlecluster/**') }} + - name: Build singlecluster image + if: steps.cache-image.outputs.cache-hit != 'true' run: | cd cloudberry-pxf/ci/singlecluster docker build -t pxf/singlecluster:3 . @@ -185,7 +193,15 @@ jobs: with: path: cloudberry-pxf + - name: Cache singlecluster Rocky 9 image + id: cache-image-rocky9 + uses: actions/cache@v4 + with: + path: /tmp/singlecluster-rocky9-image.tar + key: singlecluster-rocky9-${{ hashFiles('ci/singlecluster/**') }} + - name: Build singlecluster Rocky 9 image + if: steps.cache-image-rocky9.outputs.cache-hit != 'true' run: | cd cloudberry-pxf/ci/singlecluster docker build --build-arg BASE_IMAGE=apache/incubator-cloudberry:cbdb-build-rocky9-latest -t pxf/singlecluster-rocky9:3 . @@ -364,8 +380,8 @@ jobs: FAILED_COUNT="${{ steps.collect_artifacts.outputs.failed_count || 0 }}" SKIPPED_COUNT="${{ steps.collect_artifacts.outputs.skipped_count || 0 }}" - if [ "${{ steps.run_test.outcome }}" == "failure" ] || [ "$FAILED_COUNT" -gt 0 ]; then - echo "Test group ${{ matrix.test_group }} failed (Failures: $FAILED_COUNT, Skipped: $SKIPPED_COUNT)" + if [ "${{ steps.run_test.outcome }}" == "failure" ] || [ "${{ steps.run_test.outcome }}" == "skipped" ] || [ "$FAILED_COUNT" -gt 0 ]; then + echo "Test group ${{ matrix.test_group }} failed (outcome: ${{ steps.run_test.outcome }}, Failures: $FAILED_COUNT, Skipped: $SKIPPED_COUNT)" exit 1 fi @@ -536,8 +552,8 @@ jobs: FAILED_COUNT="${{ steps.collect_artifacts.outputs.failed_count || 0 }}" SKIPPED_COUNT="${{ steps.collect_artifacts.outputs.skipped_count || 0 }}" - if [ "${{ steps.run_test.outcome }}" == "failure" ] || [ "$FAILED_COUNT" -gt 0 ]; then - echo "Test group ${{ matrix.test_group }} (Rocky 9) failed (Failures: $FAILED_COUNT, Skipped: $SKIPPED_COUNT)" + if [ "${{ steps.run_test.outcome }}" == "failure" ] || [ "${{ steps.run_test.outcome }}" == "skipped" ] || [ "$FAILED_COUNT" -gt 0 ]; then + echo "Test group ${{ matrix.test_group }} (Rocky 9) failed (outcome: ${{ steps.run_test.outcome }}, Failures: $FAILED_COUNT, Skipped: $SKIPPED_COUNT)" exit 1 fi diff --git a/automation/pom.xml b/automation/pom.xml index e294cac0..a779c9f9 100644 --- a/automation/pom.xml +++ b/automation/pom.xml @@ -62,6 +62,12 @@ -Xmx4096m 1 false + + + listener + listeners.RetryListener + + diff --git a/automation/src/main/java/listeners/RetryAnalyzer.java b/automation/src/main/java/listeners/RetryAnalyzer.java new file mode 100644 index 00000000..74c0c75d --- /dev/null +++ b/automation/src/main/java/listeners/RetryAnalyzer.java @@ -0,0 +1,47 @@ +package listeners; + +import org.testng.IRetryAnalyzer; +import org.testng.ITestResult; + +import java.util.Random; + +/** + * Retries failed tests up to {@value MAX_RETRIES} times with exponential + * backoff to handle transient CI failures (e.g. HDFS multi-block write + * timeouts on resource-constrained GitHub Actions runners). + * + *

Delay schedule: 3-8s, 6-16s, 12-32s (capped at 60s). + */ +public class RetryAnalyzer implements IRetryAnalyzer { + + private static final int MAX_RETRIES = 3; + private static final int BASE_MIN_MS = 3000; + private static final int BASE_MAX_MS = 8000; + private static final int MAX_DELAY_MS = 60000; + + private int retryCount = 0; + private final Random random = new Random(); + + @Override + public boolean retry(ITestResult result) { + if (retryCount < MAX_RETRIES) { + retryCount++; + int multiplier = 1 << (retryCount - 1); // 1, 2, 4 + int minDelay = Math.min(BASE_MIN_MS * multiplier, MAX_DELAY_MS); + int maxDelay = Math.min(BASE_MAX_MS * multiplier, MAX_DELAY_MS); + int delay = minDelay + random.nextInt(maxDelay - minDelay + 1); + System.out.println("[RetryAnalyzer] Retrying failed test: " + + result.getTestClass().getName() + "." + + result.getMethod().getMethodName() + + " after " + delay + "ms delay" + + " (attempt " + (retryCount + 1) + "/" + (MAX_RETRIES + 1) + ")"); + try { + Thread.sleep(delay); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + return true; + } + return false; + } +} diff --git a/automation/src/main/java/listeners/RetryListener.java b/automation/src/main/java/listeners/RetryListener.java new file mode 100644 index 00000000..8b2ca0b9 --- /dev/null +++ b/automation/src/main/java/listeners/RetryListener.java @@ -0,0 +1,26 @@ +package listeners; + +import org.testng.IAnnotationTransformer; +import org.testng.annotations.ITestAnnotation; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; + +/** + * Annotation transformer that attaches {@link RetryAnalyzer} to every + * test method that does not already have a retry analyzer configured. + *

+ * Register this listener in {@code @Listeners} on the base test class + * so all automation tests automatically get retry-on-failure behaviour. + */ +public class RetryListener implements IAnnotationTransformer { + + @Override + public void transform(ITestAnnotation annotation, Class testClass, + Constructor testConstructor, Method testMethod) { + // TestNG 6.x: getRetryAnalyzer() returns IRetryAnalyzer instance (null if unset) + if (annotation.getRetryAnalyzer() == null) { + annotation.setRetryAnalyzer(RetryAnalyzer.class); + } + } +} diff --git a/ci/docker/pxf-cbdb-dev/common/script/entrypoint.sh b/ci/docker/pxf-cbdb-dev/common/script/entrypoint.sh index 832e5067..bbded9d4 100755 --- a/ci/docker/pxf-cbdb-dev/common/script/entrypoint.sh +++ b/ci/docker/pxf-cbdb-dev/common/script/entrypoint.sh @@ -20,6 +20,12 @@ # -------------------------------------------------------------------- set -euo pipefail +# Force UTC timezone for the entire container session. PXF's Parquet INT96 +# converter uses ZoneId.systemDefault() (ParquetTypeConverter.java) which +# returns the OS timezone. Rocky 9 base images may ship with a non-UTC +# default, causing timestamp regressions in Parquet read/write tests. +export TZ=UTC + log() { echo "[entrypoint][$(date '+%F %T')] $*"; } die() { log "ERROR $*"; exit 1; } @@ -60,12 +66,16 @@ setup_locale_and_packages() { log "install base packages and locales" if [ "$OS_FAMILY" = "deb" ]; then sudo apt-get update - sudo apt-get install -y wget lsb-release locales maven unzip openssh-server iproute2 sudo \ + sudo apt-get install -y wget lsb-release locales maven unzip openssh-server iproute2 sudo psmisc \ openjdk-11-jre-headless openjdk-8-jre-headless sudo locale-gen en_US.UTF-8 ru_RU.CP1251 ru_RU.UTF-8 sudo update-locale LANG=en_US.UTF-8 else - sudo dnf install -y wget maven unzip openssh-server iproute sudo \ + # Disable broken repos that may exist in the base image (e.g. hpc-common) + for repo in hpc-common; do + sudo dnf config-manager --set-disabled "$repo" 2>/dev/null || true + done + sudo dnf install -y wget maven unzip openssh-server iproute sudo psmisc \ java-11-openjdk-headless java-1.8.0-openjdk-headless \ glibc-langpack-en glibc-locale-source sudo localedef -c -i en_US -f UTF-8 en_US.UTF-8 || true @@ -263,9 +273,14 @@ configure_pxf() { log "configure PXF" source "${COMMON_SCRIPTS}/pxf-env.sh" export PATH="$PXF_HOME/bin:$PATH" - export PXF_JVM_OPTS="-Xmx512m -Xms256m" + export PXF_JVM_OPTS="-Xmx512m -Xms256m -Duser.timezone=UTC" export PXF_HOST=localhost - echo "JAVA_HOME=${JAVA_BUILD}" >> "$PXF_BASE/conf/pxf-env.sh" + # Persist settings into pxf-env.sh so they survive `pxf restart` + cat >> "$PXF_BASE/conf/pxf-env.sh" <> "$PXF_BASE/conf/pxf-application.properties" cp -v "$PXF_HOME"/templates/{hdfs,mapred,yarn,core,hbase,hive}-site.xml "$PXF_BASE/servers/default" @@ -430,9 +445,13 @@ wait_for_datanode() { # Stop any zombie DataNode processes pkill -f "proc_datanode" 2>/dev/null || true sleep 2 + # Force-release DataNode ports + for port in 50010 50020 50075 50080; do + fuser -k ${port}/tcp 2>/dev/null || true + done + sleep 3 # Restart DataNode via the singlecluster script "${GPHD_ROOT}/bin/hadoop-datanode.sh" start 0 2>&1 || true - "${HADOOP_ROOT}/sbin/hadoop-daemon.sh" --config "${GPHD_ROOT}/storage/hadoop/datanode0/etc/hadoop" start datanode 2>&1 || true log "DataNode restart issued, waiting again..." fi done @@ -440,6 +459,43 @@ wait_for_datanode() { die "HDFS DataNode failed to start after ${max_attempts} attempts. Tez upload will fail without a running DataNode." } +wait_for_hbase() { + log "waiting for HBase RegionServer to become available..." + local max_wait=60 + for i in $(seq 1 ${max_wait}); do + if pgrep -f HRegionServer >/dev/null 2>&1; then + log "HBase RegionServer is running (after ${i}s), waiting 10s for stabilization..." + sleep 10 + if pgrep -f HRegionServer >/dev/null 2>&1; then + log "HBase RegionServer is stable" + return 0 + fi + log "HBase RegionServer died during stabilization" + break + fi + sleep 1 + done + # RegionServer didn't come up or crashed; try restarting HBase once + log "HBase RegionServer not stable, attempting restart..." + ${GPHD_ROOT}/bin/stop-hbase.sh 2>/dev/null || true + sleep 2 + ${GPHD_ROOT}/bin/start-hbase.sh 2>/dev/null || true + for i in $(seq 1 60); do + if pgrep -f HRegionServer >/dev/null 2>&1; then + log "HBase RegionServer is running after restart (after ${i}s), waiting 10s..." + sleep 10 + if pgrep -f HRegionServer >/dev/null 2>&1; then + log "HBase RegionServer is stable after restart" + return 0 + fi + log "WARN: HBase RegionServer died again during stabilization, continuing anyway" + return 0 + fi + sleep 1 + done + log "WARN: HBase RegionServer failed to start after restart, continuing anyway" +} + prepare_hadoop_stack() { log "prepare Hadoop/Hive/HBase stack" export JAVA_HOME="${JAVA_HADOOP}" @@ -468,6 +524,13 @@ prepare_hadoop_stack() { log "initializing HDFS namenode..." ${GPHD_ROOT}/bin/init-gphd.sh 2>&1 || log "init-gphd.sh failed with exit code $?" fi + # Force-release DataNode ports before starting HDFS to prevent BindException. + # On CI re-runs or slow runners, stale sockets/processes may hold these ports. + log "ensuring DataNode ports are free..." + for port in 50010 50020 50075 50080; do + fuser -k ${port}/tcp 2>/dev/null || true + done + sleep 1 log "starting HDFS/YARN/HBase via start-gphd.sh..." if ! ${GPHD_ROOT}/bin/start-gphd.sh 2>&1; then log "start-gphd.sh returned non-zero (services may already be running), continue" @@ -482,6 +545,7 @@ prepare_hadoop_stack() { if ! ${GPHD_ROOT}/bin/start-hbase.sh; then log "start-hbase.sh returned non-zero (services may already be running), continue" fi + wait_for_hbase start_hive_services } diff --git a/ci/docker/pxf-cbdb-dev/common/script/run_tests.sh b/ci/docker/pxf-cbdb-dev/common/script/run_tests.sh index 63b99352..230222c1 100755 --- a/ci/docker/pxf-cbdb-dev/common/script/run_tests.sh +++ b/ci/docker/pxf-cbdb-dev/common/script/run_tests.sh @@ -20,6 +20,9 @@ # -------------------------------------------------------------------- set -euo pipefail +# Ensure UTC timezone (see entrypoint.sh for rationale) +export TZ=UTC + # Run automation tests only (assumes build/env already prepared) # Use a unique var name to avoid clobbering by sourced env scripts @@ -90,6 +93,28 @@ health_check_with_retry() { fi } +mvn_with_retry() { + local max_attempts=3 + for attempt in $(seq 1 ${max_attempts}); do + if mvn "$@"; then + return 0 + fi + if [ "${attempt}" -lt "${max_attempts}" ]; then + echo "[run_tests] Maven failed (attempt ${attempt}/${max_attempts}), retrying in 10s..." + sleep 10 + fi + done + echo "[run_tests] Maven failed after ${max_attempts} attempts" + return 1 +} + +resolve_maven_dependencies() { + echo "[run_tests] Pre-resolving Maven dependencies..." + pushd "${REPO_ROOT}/automation" >/dev/null + mvn_with_retry -B -q dependency:resolve -DskipTests 2>&1 || echo "[warn] Maven dependency resolution failed, tests may fail" + popd >/dev/null +} + cleanup_hdfs_test_data() { hdfs dfs -rm -r -f /gpdb-ud-scratch/tmp/pxf_automation_data >/dev/null 2>&1 || true } @@ -526,7 +551,7 @@ ensure_testplugin_jar() { export PXF_HOME=${PXF_HOME:-/usr/local/pxf} if [ ! -f "${PXF_BASE}/lib/pxf-automation-test.jar" ]; then pushd "${REPO_ROOT}/automation" >/dev/null - mvn -q -DskipTests test-compile + mvn_with_retry -q -DskipTests test-compile jar cf "${PXF_BASE}/lib/pxf-automation-test.jar" -C target/classes org/apache/cloudberry/pxf/automation/testplugin popd >/dev/null JAVA_HOME="${JAVA_BUILD}" "${PXF_HOME}/bin/pxf" restart >/dev/null || true @@ -853,10 +878,13 @@ generate_test_summary() { run_single_group() { local group="$1" echo "[run_tests] Running single test group: $group" - + + # Pre-resolve Maven dependencies with retry for transient network failures + resolve_maven_dependencies + # Run health check first health_check_with_retry - + ensure_testuser_pg_hba export PGHOST=127.0.0.1 export PATH="${GPHOME}/bin:${PATH}" diff --git a/ci/docker/pxf-cbdb-dev/common/script/utils.sh b/ci/docker/pxf-cbdb-dev/common/script/utils.sh index c055dd25..44755bfd 100755 --- a/ci/docker/pxf-cbdb-dev/common/script/utils.sh +++ b/ci/docker/pxf-cbdb-dev/common/script/utils.sh @@ -45,19 +45,23 @@ check_jvm_procs() { fi echo "$jps_out" echo "$jps_out" | grep -q NameNode || die "NameNode not running" - echo "$jps_out" | grep -q DataNode || die "DataNode not running" + echo "$jps_out" | grep -q DataNode || log "WARN: DataNode not running (may still be registering)" } check_hbase() { local hbase_host="${HBASE_HOST:-$(hostname -I | awk '{print $1}')}" hbase_host=${hbase_host:-127.0.0.1} + # HBase checks are non-fatal: test groups that need HBase will fail with + # clear test errors; groups that don't need HBase should not be blocked. if ! echo "$jps_out" | grep -q HMaster && ! pgrep -f HMaster >/dev/null 2>&1; then - die "HBase HMaster not running" + log "WARN: HBase HMaster not running" + return 0 fi if ! echo "$jps_out" | grep -q HRegionServer && ! pgrep -f HRegionServer >/dev/null 2>&1; then - die "HBase RegionServer not running" + log "WARN: HBase RegionServer not running" + return 0 fi local hbase_ok=true @@ -69,7 +73,7 @@ check_hbase() { fi if [ "${hbase_ok}" != "true" ]; then [ -f /tmp/hbase_status.log ] && cat /tmp/hbase_status.log - die "HBase health check failed (status or port 16000 on ${hbase_host})" + log "WARN: HBase health check failed (status or port 16000 on ${hbase_host})" fi } diff --git a/ci/singlecluster/Dockerfile b/ci/singlecluster/Dockerfile index 4d6bb655..c61deef4 100644 --- a/ci/singlecluster/Dockerfile +++ b/ci/singlecluster/Dockerfile @@ -50,16 +50,8 @@ ENV ZOOKEEPER_SHA512="0e5a64713abc6f36d961dd61a06f681868171a9d9228366e512a013248 ENV HBASE_SHA512="1032521025660daa70260cdc931f52a26c87596be444451fe1fa88b526ede55e9d6b4220e91ff6f7422bec11f30d64fa6745e95a9c36971fdb1a264a2c745693" ENV TEZ_SHA512="a2d94bd9fa778d42a8bac9d9da8e263e469ddfef93968b06434716554995f490231de5607541ac236e770aa0158b64250c38bc1cd57dbfa629fea705f2ffa2f5" -# faster mirror: -ENV APACHE_MIRROR="repo.huaweicloud.com/apache" -#ENV APACHE_MIRROR="archive.apache.org/dist/" -#ENV APACHE_MIRROR="mirror.yandex.ru/mirrors/apache/" - -ENV HADOOP_URL="https://$APACHE_MIRROR/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz" -ENV HIVE_URL="https://$APACHE_MIRROR/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz" -ENV ZOOKEEPER_URL="https://$APACHE_MIRROR/zookeeper/zookeeper-$ZOOKEEPER_VERSION/apache-zookeeper-$ZOOKEEPER_VERSION-bin.tar.gz" -ENV HBASE_URL="https://$APACHE_MIRROR/hbase/$HBASE_VERSION/hbase-$HBASE_VERSION-bin.tar.gz" -ENV TEZ_URL="https://$APACHE_MIRROR/tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz" +# Mirror list: try fast mirrors first, fall back to official archive +ENV APACHE_MIRRORS="dlcdn.apache.org archive.apache.org/dist" ENV GPHD_ROOT=/home/gpadmin/workspace/singlecluster ENV HADOOP_ROOT=$GPHD_ROOT/hadoop @@ -68,34 +60,54 @@ ENV HIVE_ROOT=$GPHD_ROOT/hive ENV ZOOKEEPER_ROOT=$GPHD_ROOT/zookeeper ENV TEZ_ROOT=$GPHD_ROOT/tez +# Helper: download from first working mirror with retry +# Usage: apache_download +RUN sudo tee /usr/local/bin/apache_download.sh > /dev/null <<'DLEOF' && sudo chmod +x /usr/local/bin/apache_download.sh +#!/bin/bash +set -e +rel_path="$1"; output="$2" +for mirror in $APACHE_MIRRORS; do + url="https://${mirror}/${rel_path}" + echo "Trying: $url" + if curl -fSL --retry 2 --retry-delay 3 --connect-timeout 15 "$url" -o "$output" 2>&1; then + echo "Downloaded from $mirror" + exit 0 + fi + echo "Failed from $mirror, trying next..." + rm -f "$output" +done +echo "ERROR: all mirrors failed for $rel_path" +exit 1 +DLEOF + RUN mkdir -p $HADOOP_ROOT && \ - curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz && \ + apache_download.sh "hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz" /tmp/hadoop.tar.gz && \ echo "$HADOOP_SHA512 /tmp/hadoop.tar.gz" | sha512sum -c && \ tar xvf /tmp/hadoop.tar.gz -C $HADOOP_ROOT --strip-components 1 --exclude="share/doc/*" --exclude="*-sources.jar" && \ rm /tmp/hadoop.tar.gz && \ - curl -fSL "https://repo1.maven.org/maven2/javax/activation/javax.activation-api/1.2.0/javax.activation-api-1.2.0.jar" \ + curl -fSL --retry 2 "https://repo1.maven.org/maven2/javax/activation/javax.activation-api/1.2.0/javax.activation-api-1.2.0.jar" \ -o $HADOOP_ROOT/share/hadoop/common/lib/javax.activation-api-1.2.0.jar RUN mkdir -p $HIVE_ROOT && \ - curl -fSL $HIVE_URL -o /tmp/hive.tar.gz && \ + apache_download.sh "hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz" /tmp/hive.tar.gz && \ echo "$HIVE_SHA256 /tmp/hive.tar.gz" | sha256sum -c && \ tar xvf /tmp/hive.tar.gz -C $HIVE_ROOT --strip-components 1 && \ rm /tmp/hive.tar.gz RUN mkdir -p $ZOOKEEPER_ROOT && \ - curl -fSL $ZOOKEEPER_URL -o /tmp/zookeeper.tar.gz && \ + apache_download.sh "zookeeper/zookeeper-$ZOOKEEPER_VERSION/apache-zookeeper-$ZOOKEEPER_VERSION-bin.tar.gz" /tmp/zookeeper.tar.gz && \ echo "$ZOOKEEPER_SHA512 /tmp/zookeeper.tar.gz" | sha512sum -c && \ tar xvf /tmp/zookeeper.tar.gz -C $ZOOKEEPER_ROOT --strip-components 1 --exclude="docs/*" && \ rm /tmp/zookeeper.tar.gz RUN mkdir -p $HBASE_ROOT && \ - curl -fSL "$HBASE_URL" -o /tmp/hbase.tar.gz && \ + apache_download.sh "hbase/$HBASE_VERSION/hbase-$HBASE_VERSION-bin.tar.gz" /tmp/hbase.tar.gz && \ echo "$HBASE_SHA512 /tmp/hbase.tar.gz" | sha512sum -c && \ tar xvf /tmp/hbase.tar.gz -C $HBASE_ROOT --strip-components 1 --exclude="docs/*" --exclude="lib/*-tests.jar" --exclude="lib/shaded-clients" && \ rm /tmp/hbase.tar.gz RUN mkdir -p $TEZ_ROOT && \ - curl -fSL "$TEZ_URL" -o /tmp/tez.tar.gz && \ + apache_download.sh "tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz" /tmp/tez.tar.gz && \ echo "$TEZ_SHA512 /tmp/tez.tar.gz" | sha512sum -c && \ tar xvf /tmp/tez.tar.gz -C $TEZ_ROOT --strip-components 1 && \ rm /tmp/tez.tar.gz diff --git a/server/gradlew-install.sh b/server/gradlew-install.sh index 510fa2ad..71dc0c70 100755 --- a/server/gradlew-install.sh +++ b/server/gradlew-install.sh @@ -58,13 +58,23 @@ if [ ! -e "${GRADLE_WRAPPER_JAR}" ]; then # The Gradle version extracted from the `distributionUrl` property does not contain ".0" patch # versions. Need to append a ".0" in that case to download the wrapper jar. GRADLE_VERSION="$(echo "$GRADLE_DIST_VERSION" | sed 's/^\([0-9]*[.][0-9]*\)$/\1.0/')" - curl --location --output "${GRADLE_WRAPPER_JAR}" https://raw.githubusercontent.com/gradle/gradle/v${GRADLE_VERSION}/gradle/wrapper/gradle-wrapper.jar || exit 1 - JAR_CHECKSUM="$(${SHASUM} "${GRADLE_WRAPPER_JAR}" | cut -d\ -f1)" EXPECTED="$(cat "${GRADLE_WRAPPER_SHA256}")" - if [ "${JAR_CHECKSUM}" != "${EXPECTED}" ]; then - # If the (just downloaded) checksum and the downloaded wrapper jar do not match, something - # really bad is going on. + MAX_RETRIES=3 + for _retry in $(seq 1 ${MAX_RETRIES}); do + curl --location --fail --output "${GRADLE_WRAPPER_JAR}" https://raw.githubusercontent.com/gradle/gradle/v${GRADLE_VERSION}/gradle/wrapper/gradle-wrapper.jar || { + echo "Download attempt ${_retry}/${MAX_RETRIES} failed (curl error)" > /dev/stderr + rm -f "${GRADLE_WRAPPER_JAR}" + if [ "${_retry}" -lt "${MAX_RETRIES}" ]; then sleep 5; continue; fi + exit 1 + } + JAR_CHECKSUM="$(${SHASUM} "${GRADLE_WRAPPER_JAR}" | cut -d\ -f1)" + if [ "${JAR_CHECKSUM}" = "${EXPECTED}" ]; then + break + fi + echo "SHA256 mismatch on attempt ${_retry}/${MAX_RETRIES} (got ${JAR_CHECKSUM}, expected ${EXPECTED})" > /dev/stderr + rm -f "${GRADLE_WRAPPER_JAR}" + if [ "${_retry}" -lt "${MAX_RETRIES}" ]; then sleep 5; continue; fi echo "Expected sha256 of the downloaded gradle-wrapper.jar does not match the downloaded sha256!" > /dev/stderr exit 1 - fi + done fi