Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions .github/workflows/fuzz.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
name: Fuzz

concurrency:
# The group causes runs to queue instead of running in parallel.
group: fuzz
# This ensures each run builds on the previous run's corpus discoveries rather than losing them to
# failed compare-and-swap uploads.
cancel-in-progress: false
# No concurrency group — runs are allowed to overlap. We have safe corpus merge

on:
schedule:
- cron: "0 * * * *" # every hour
- cron: "0 */6 * * *" # every 6 hours
workflow_dispatch: { }

jobs:
Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/minimize_fuzz_corpus.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
name: Minimize All Fuzz Corpora

concurrency:
group: fuzz
cancel-in-progress: false
# No concurrency group — runs are allowed to overlap. We have safe corpus merge


on:
schedule:
Expand Down
19 changes: 14 additions & 5 deletions .github/workflows/minimize_fuzz_corpus_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ jobs:
exit 0
fi

- name: Record original corpus files
run: |
CORPUS_DIR="fuzz/corpus/${{ inputs.fuzz_target }}"
ls "$CORPUS_DIR/" | sort > /tmp/original_files.txt
echo "Original corpus: $(wc -l < /tmp/original_files.txt) files"

- name: Minimize corpus
run: |
FEATURES_FLAG=""
Expand All @@ -94,18 +100,21 @@ jobs:
mkdir -p "$MINIMIZED_DIR"
cargo +$NIGHTLY_TOOLCHAIN fuzz cmin $FEATURES_FLAG \
${{ inputs.fuzz_target }} "$CORPUS_DIR" -- "$MINIMIZED_DIR"
# Replace original with minimized for upload
rm -rf "$CORPUS_DIR"
mv "$MINIMIZED_DIR" "$CORPUS_DIR"

- name: Persist corpus
- name: Merge and persist minimized corpus
shell: bash
env:
AWS_ACCESS_KEY_ID: ${{ secrets.R2_FUZZ_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_FUZZ_SECRET_ACCESS_KEY }}
AWS_REGION: "us-east-1"
AWS_ENDPOINT_URL: "https://01e9655179bbec953276890b183039bc.r2.cloudflarestorage.com"
run: |
CORPUS_KEY="${{ inputs.fuzz_target }}_corpus.tar.zst"
CORPUS_DIR="fuzz/corpus/${{ inputs.fuzz_target }}"
tar -acf "$CORPUS_KEY" "$CORPUS_DIR"
python3 scripts/s3-upload.py --bucket vortex-fuzz-corpus --key "$CORPUS_KEY" --body "$CORPUS_KEY" --checksum-algorithm CRC32
python3 scripts/s3-corpus-merge-upload.py \
--bucket vortex-fuzz-corpus \
--key "${{ inputs.fuzz_target }}_corpus.tar.zst" \
--corpus-dir "fuzz/corpus/${{ inputs.fuzz_target }}" \
--original-snapshot /tmp/original_files.txt \
--checksum-algorithm CRC32
92 changes: 81 additions & 11 deletions .github/workflows/run-fuzzer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ on:
description: "Maximum fuzzing time in seconds"
required: false
type: number
default: 2700
default: 18000
runner:
description: "Runner name from .github-private runs-on.yml (e.g., arm64-medium, gpu)"
required: false
Expand Down Expand Up @@ -61,7 +61,7 @@ jobs:
name: "Run ${{ inputs.fuzz_name || inputs.fuzz_target }}"
env:
FUZZ_NAME: ${{ inputs.fuzz_name || inputs.fuzz_target }}
timeout-minutes: 240 # 4 hours
timeout-minutes: 370 # 6 hours 10 minutes
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner={1}/disk=large/tag={2}-fuzz', github.run_id, inputs.runner, inputs.fuzz_name || inputs.fuzz_target)
Expand Down Expand Up @@ -113,6 +113,45 @@ jobs:
mkdir -p "$CORPUS_DIR"
fi

- name: Capture GPU diagnostics (pre-run)
if: inputs.runner == 'gpu' || contains(inputs.extra_features, 'cuda')
shell: bash
run: |
{
echo "===== GPU diagnostics (pre-run) ====="
echo "phase=pre-run"
echo "timestamp=$(date -u --iso-8601=seconds)"
echo "hostname=$(hostname)"
echo "uname=$(uname -a)"
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-<unset>}"
echo "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:-<unset>}"
echo
echo "## CUDA/NVIDIA environment"
env | sort | grep -E '^(CUDA|NVIDIA|LD_LIBRARY_PATH|LIBRARY_PATH|PATH=|CARGO_TARGET_|RUSTFLAGS|RUSTC_WRAPPER|ASAN_OPTIONS|UBSAN_OPTIONS|LSAN_OPTIONS|TSAN_OPTIONS)=' || true
echo
echo "## nvcc location"
which nvcc || true
echo
echo "## nvcc --version"
nvcc --version || true
echo
echo "## nvidia-smi"
nvidia-smi || true
echo
echo "## nvidia-smi -L"
nvidia-smi -L || true
echo
echo "## GPU memory details"
nvidia-smi -q -d Memory || true
echo
echo "## GPU summary"
nvidia-smi --query-gpu=index,uuid,name,driver_version,memory.total,memory.used,memory.free,utilization.gpu,temperature.gpu --format=csv || true
echo
echo "## Active compute processes"
nvidia-smi --query-compute-apps=gpu_uuid,pid,process_name,used_memory --format=csv,noheader || true
echo
}

- name: Run fuzzing target
id: fuzz
run: |
Expand All @@ -129,9 +168,36 @@ jobs:
$FEATURES_FLAG \
${{ inputs.fuzz_target }} -- \
$FORK_FLAG -max_total_time=${{ inputs.max_time }} -rss_limit_mb=0 \
2>&1 | tee fuzz_output.log
2>&1 | tee -a fuzz_output.log
continue-on-error: true

- name: Capture GPU diagnostics (post-run)
if: always() && (inputs.runner == 'gpu' || contains(inputs.extra_features, 'cuda'))
shell: bash
run: |
{
echo "===== GPU diagnostics (post-run) ====="
echo "phase=post-run"
echo "timestamp=$(date -u --iso-8601=seconds)"
echo "hostname=$(hostname)"
echo "uname=$(uname -a)"
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-<unset>}"
echo "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:-<unset>}"
echo
echo "## nvidia-smi"
nvidia-smi || true
echo
echo "## GPU memory details"
nvidia-smi -q -d Memory || true
echo
echo "## GPU summary"
nvidia-smi --query-gpu=index,uuid,name,driver_version,memory.total,memory.used,memory.free,utilization.gpu,temperature.gpu --format=csv || true
echo
echo "## Active compute processes"
nvidia-smi --query-compute-apps=gpu_uuid,pid,process_name,used_memory --format=csv,noheader || true
echo
}

- name: Check for crashes
id: check
run: |
Expand Down Expand Up @@ -168,7 +234,7 @@ jobs:
$FEATURES_FLAG \
${{ inputs.fuzz_target }} \
"${{ steps.check.outputs.first_crash }}" \
2>&1 | tee fuzz_output.log || true
2>&1 | tee -a fuzz_output.log || true

- name: Archive crash artifacts
id: upload_artifacts
Expand All @@ -187,20 +253,24 @@ jobs:
path: fuzz_output.log
retention-days: 90

- name: Persist corpus
- name: Merge and persist corpus
if: always()
shell: bash
env:
AWS_ACCESS_KEY_ID: ${{ secrets.R2_FUZZ_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_FUZZ_SECRET_ACCESS_KEY }}
AWS_REGION: "us-east-1"
AWS_ENDPOINT_URL: "https://01e9655179bbec953276890b183039bc.r2.cloudflarestorage.com"
run: |
CORPUS_KEY="${FUZZ_NAME}_corpus.tar.zst"
CORPUS_DIR="fuzz/corpus/${FUZZ_NAME}"

tar -acf "$CORPUS_KEY" "$CORPUS_DIR"

python3 scripts/s3-upload.py --bucket vortex-fuzz-corpus --key "$CORPUS_KEY" --body "$CORPUS_KEY" --checksum-algorithm CRC32 --optimistic-lock
if [ "${GITHUB_REF}" != "refs/heads/develop" ]; then
echo "Skipping corpus upload for ${GITHUB_REF}; only develop updates the shared fuzz corpus."
exit 0
fi
python3 scripts/s3-corpus-merge-upload.py \
--bucket vortex-fuzz-corpus \
--key "${FUZZ_NAME}_corpus.tar.zst" \
--corpus-dir "fuzz/corpus/${FUZZ_NAME}" \
--checksum-algorithm CRC32

- name: Fail job if fuzz run found a bug
if: steps.check.outputs.crashes_found == 'true'
Expand Down
144 changes: 144 additions & 0 deletions fuzz/fuzz_targets/compress_gpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,156 @@
#![no_main]
#![expect(clippy::unwrap_used)]

use std::env;
use std::fs;
use std::process::Command;
use std::sync::OnceLock;

use libfuzzer_sys::Corpus;
use libfuzzer_sys::fuzz_target;
use vortex_error::vortex_panic;
use vortex_fuzz::FuzzCompressGpu;
use vortex_fuzz::run_compress_gpu;

static STARTUP_DIAGNOSTICS: OnceLock<()> = OnceLock::new();

fn log_command(label: &str, command: &str, args: &[&str]) {
eprintln!("## {label}");
match Command::new(command).args(args).output() {
Ok(output) => {
eprintln!("status: {}", output.status);

let stdout = String::from_utf8_lossy(&output.stdout);
if !stdout.trim().is_empty() {
eprintln!("stdout:\n{stdout}");
}

let stderr = String::from_utf8_lossy(&output.stderr);
if !stderr.trim().is_empty() {
eprintln!("stderr:\n{stderr}");
}
}
Err(err) => eprintln!("failed to run `{command}`: {err}"),
}
}

fn log_relevant_processes(pid: u32) {
eprintln!("## relevant processes");
match Command::new("ps")
.args(["-Ao", "pid,ppid,pgid,comm,args"])
.output()
{
Ok(output) => {
eprintln!("status: {}", output.status);
let pid = pid.to_string();
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines().filter(|line| {
line.contains(&pid)
|| line.contains("compress_gpu")
|| line.contains("cargo")
|| line.contains("libFuzzer")
}) {
eprintln!("{line}");
}

let stderr = String::from_utf8_lossy(&output.stderr);
if !stderr.trim().is_empty() {
eprintln!("stderr:\n{stderr}");
}
}
Err(err) => eprintln!("failed to run `ps`: {err}"),
}
}

fn log_process_snapshot() {
let pid = std::process::id();
eprintln!("pid={pid}");
eprintln!("argv={:?}", env::args().collect::<Vec<_>>());

if let Ok(status) = fs::read_to_string("/proc/self/status") {
let interesting = status
.lines()
.filter(|line| {
matches!(
line.split_once(':').map(|(key, _)| key),
Some("Name")
| Some("State")
| Some("Pid")
| Some("PPid")
| Some("Threads")
| Some("VmPeak")
| Some("VmSize")
| Some("VmRSS")
| Some("VmData")
| Some("VmSwap")
)
})
.collect::<Vec<_>>();

if !interesting.is_empty() {
eprintln!("## /proc/self/status");
for line in interesting {
eprintln!("{line}");
}
}
}

let pid_arg = pid.to_string();
log_command(
"current process ps",
"ps",
&[
"-p",
pid_arg.as_str(),
"-o",
"pid,ppid,pgid,rss,vsz,etimes,comm,args",
],
);
log_relevant_processes(pid);
}

fn log_cuda_diagnostics(phase: &str) {
eprintln!("===== compress_gpu CUDA diagnostics ({phase}) =====");
eprintln!("cuda_available()={}", vortex_cuda::cuda_available());
log_process_snapshot();
eprintln!(
"CUDA_VISIBLE_DEVICES={}",
env::var("CUDA_VISIBLE_DEVICES").unwrap_or_else(|_| "<unset>".to_string())
);
eprintln!(
"NVIDIA_VISIBLE_DEVICES={}",
env::var("NVIDIA_VISIBLE_DEVICES").unwrap_or_else(|_| "<unset>".to_string())
);
eprintln!(
"LD_LIBRARY_PATH={}",
env::var("LD_LIBRARY_PATH").unwrap_or_else(|_| "<unset>".to_string())
);

log_command("nvcc --version", "nvcc", &["--version"]);
log_command("nvidia-smi", "nvidia-smi", &[]);
log_command("nvidia-smi -L", "nvidia-smi", &["-L"]);
log_command("nvidia-smi memory", "nvidia-smi", &["-q", "-d", "Memory"]);
log_command(
"nvidia-smi gpu summary",
"nvidia-smi",
&[
"--query-gpu=index,uuid,name,driver_version,memory.total,memory.used,memory.free,utilization.gpu,temperature.gpu",
"--format=csv",
],
);
log_command(
"nvidia-smi compute processes",
"nvidia-smi",
&[
"--query-compute-apps=gpu_uuid,pid,process_name,used_memory",
"--format=csv,noheader",
],
);
}

fuzz_target!(|fuzz: FuzzCompressGpu| -> Corpus {
STARTUP_DIAGNOSTICS.get_or_init(|| log_cuda_diagnostics("startup"));

// Use tokio runtime to run async GPU fuzzer
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
Expand All @@ -21,6 +164,7 @@ fuzz_target!(|fuzz: FuzzCompressGpu| -> Corpus {
Ok(true) => Corpus::Keep,
Ok(false) => Corpus::Reject,
Err(e) => {
log_cuda_diagnostics("error");
vortex_panic!("{e}");
}
}
Expand Down
Loading
Loading