From 4e29afd596afe28a0ee601f5a016adafa2745cf0 Mon Sep 17 00:00:00 2001 From: Tien Tong <35613222+tien-tong@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:39:42 -0400 Subject: [PATCH 1/4] add HDF5 benchmark --- README.rst | 13 + .../h5_benchmark_results_gw0.csv | 27 + .../h5_benchmark_results_gw1.csv | 7 + .../h5_benchmark_results_gw10.csv | 19 + .../h5_benchmark_results_gw11.csv | 22 + .../h5_benchmark_results_gw12.csv | 17 + .../h5_benchmark_results_gw13.csv | 11 + .../h5_benchmark_results_gw2.csv | 27 + .../h5_benchmark_results_gw3.csv | 7 + .../h5_benchmark_results_gw4.csv | 18 + .../h5_benchmark_results_gw5.csv | 7 + .../h5_benchmark_results_gw6.csv | 16 + .../h5_benchmark_results_gw7.csv | 7 + .../h5_benchmark_results_gw8.csv | 22 + .../h5_benchmark_results_gw9.csv | 7 + .../plots/chunk_geometry_vs_target_chunk.svg | 825 ++++++ .../plots/chunk_tradeoff_time_and_size.svg | 880 ++++++ .../gzip_level_tradeoff_time_and_size.svg | 782 ++++++ .../plots/h5_benchmark_summary.svg | 2440 +++++++++++++++++ .../plots/pareto_size_vs_time.svg | 912 ++++++ .../plots/scaling_size_vs_inputs.svg | 621 +++++ .../scaling_throughput_mib_vs_inputs.svg | 673 +++++ .../scaling_throughput_values_vs_inputs.svg | 653 +++++ .../plots/scaling_time_vs_inputs.svg | 598 ++++ .../plots/shuffle_effect_summary.svg | 832 ++++++ benchmark_results/run_meta_gw0.json | 13 + benchmark_results/run_meta_gw1.json | 13 + benchmark_results/run_meta_gw10.json | 13 + benchmark_results/run_meta_gw11.json | 13 + benchmark_results/run_meta_gw12.json | 13 + benchmark_results/run_meta_gw13.json | 13 + benchmark_results/run_meta_gw2.json | 13 + benchmark_results/run_meta_gw3.json | 13 + benchmark_results/run_meta_gw4.json | 13 + benchmark_results/run_meta_gw5.json | 13 + benchmark_results/run_meta_gw6.json | 13 + benchmark_results/run_meta_gw7.json | 13 + benchmark_results/run_meta_gw8.json | 13 + benchmark_results/run_meta_gw9.json | 13 + docs/_static/h5_benchmark_summary.svg | 2440 +++++++++++++++++ docs/benchmarking.rst | 116 + docs/index.rst | 1 + pyproject.toml | 5 + src/modelarrayio/cli/h5_to_nifti.py | 4 +- test/__init__.py | 1 + test/benchmark_data_utils.py | 383 +++ test/plot_h5_benchmarks.R | 891 ++++++ test/test_h5_benchmarks.py | 403 +++ 48 files changed, 13867 insertions(+), 2 deletions(-) create mode 100644 benchmark_results/h5_benchmark_results_gw0.csv create mode 100644 benchmark_results/h5_benchmark_results_gw1.csv create mode 100644 benchmark_results/h5_benchmark_results_gw10.csv create mode 100644 benchmark_results/h5_benchmark_results_gw11.csv create mode 100644 benchmark_results/h5_benchmark_results_gw12.csv create mode 100644 benchmark_results/h5_benchmark_results_gw13.csv create mode 100644 benchmark_results/h5_benchmark_results_gw2.csv create mode 100644 benchmark_results/h5_benchmark_results_gw3.csv create mode 100644 benchmark_results/h5_benchmark_results_gw4.csv create mode 100644 benchmark_results/h5_benchmark_results_gw5.csv create mode 100644 benchmark_results/h5_benchmark_results_gw6.csv create mode 100644 benchmark_results/h5_benchmark_results_gw7.csv create mode 100644 benchmark_results/h5_benchmark_results_gw8.csv create mode 100644 benchmark_results/h5_benchmark_results_gw9.csv create mode 100644 benchmark_results/plots/chunk_geometry_vs_target_chunk.svg create mode 100644 benchmark_results/plots/chunk_tradeoff_time_and_size.svg create mode 100644 benchmark_results/plots/gzip_level_tradeoff_time_and_size.svg create mode 100644 benchmark_results/plots/h5_benchmark_summary.svg create mode 100644 benchmark_results/plots/pareto_size_vs_time.svg create mode 100644 benchmark_results/plots/scaling_size_vs_inputs.svg create mode 100644 benchmark_results/plots/scaling_throughput_mib_vs_inputs.svg create mode 100644 benchmark_results/plots/scaling_throughput_values_vs_inputs.svg create mode 100644 benchmark_results/plots/scaling_time_vs_inputs.svg create mode 100644 benchmark_results/plots/shuffle_effect_summary.svg create mode 100644 benchmark_results/run_meta_gw0.json create mode 100644 benchmark_results/run_meta_gw1.json create mode 100644 benchmark_results/run_meta_gw10.json create mode 100644 benchmark_results/run_meta_gw11.json create mode 100644 benchmark_results/run_meta_gw12.json create mode 100644 benchmark_results/run_meta_gw13.json create mode 100644 benchmark_results/run_meta_gw2.json create mode 100644 benchmark_results/run_meta_gw3.json create mode 100644 benchmark_results/run_meta_gw4.json create mode 100644 benchmark_results/run_meta_gw5.json create mode 100644 benchmark_results/run_meta_gw6.json create mode 100644 benchmark_results/run_meta_gw7.json create mode 100644 benchmark_results/run_meta_gw8.json create mode 100644 benchmark_results/run_meta_gw9.json create mode 100644 docs/_static/h5_benchmark_summary.svg create mode 100644 docs/benchmarking.rst create mode 100644 test/__init__.py create mode 100644 test/benchmark_data_utils.py create mode 100644 test/plot_h5_benchmarks.R create mode 100644 test/test_h5_benchmarks.py diff --git a/README.rst b/README.rst index 26bfc12..bb24d55 100644 --- a/README.rst +++ b/README.rst @@ -135,3 +135,16 @@ Notes and minor differences: * Compression: HDF5 uses ``gzip`` by default; TileDB defaults to ``zstd`` with shuffle for better speed/ratio. You can switch to ``gzip`` for parity. * Metadata: HDF5 stores ``column_names`` as a dataset attribute; TileDB stores names as JSON metadata on the array/group. * Layout: Both backends keep dimensions in the same order and use zero-based indices. + +HDF5 Benchmarking and Plots +=========================== + +Benchmark commands, environment setup, output artifacts, and plotting workflow +are documented in the dedicated benchmarking guide: +`docs/benchmarking.rst `_. + +Faceted HDF5 benchmark summary: + +.. image:: docs/_static/h5_benchmark_summary.svg + :align: center + :alt: Faceted HDF5 benchmark summary (write time, output size, throughput, and Pareto trade-off) diff --git a/benchmark_results/h5_benchmark_results_gw0.csv b/benchmark_results/h5_benchmark_results_gw0.csv new file mode 100644 index 0000000..f96c870 --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw0.csv @@ -0,0 +1,27 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:45:08.447506+00:00,full,100,4.0,gzip,1,1,float32,65193,100,10485,0.7752895409939811,0.3524996240157634,0.33441041596233845,20200558,0.0188132356852293,8408858.439702094,32.07724929695928,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:09.628760+00:00,full,100,4.0,gzip,4,1,float32,65193,100,10485,1.1801326249842532,0.5468796659260988,0.4793671249644831,19886083,0.0185203580185771,5524209.620157725,21.073187332754994,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:17.354581+00:00,full,100,4.0,gzip,9,1,float32,65193,100,10485,7.724341041001026,0.5281167079810984,5.594763250031974,19690907,0.018338586203753948,843994.3246155714,3.219582842314039,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:31.068509+00:00,full,1000,4.0,gzip,1,1,float32,65193,1000,1048,13.642644957988523,7.314127916994039,6.182141458848491,201407422,0.18757527880370617,4778618.823604722,18.228984159869086,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:43.264232+00:00,full,1000,4.0,gzip,4,1,float32,65193,1000,1048,12.192003750009462,5.213998627092224,6.829684084688779,198349904,0.184727743268013,5347193.237202655,20.39792342072546,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:47:20.579020+00:00,full,1000,4.0,gzip,9,1,float32,65193,1000,1048,97.30141670902958,6.686576583713759,88.71188225591322,196048516,0.18258440867066383,670010.7994825329,2.5558883647252384,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:47:21.355830+00:00,full,100,64.0,lzf,0,1,float32,65193,100,65193,0.7615424580289982,0.6318945839884691,0.12797408294864,22072753,0.020556853152811527,8560652.04410672,32.65629594462097,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:47:27.910013+00:00,full,1000,64.0,lzf,0,1,float32,65193,1000,16777,6.5374749160255305,5.3934153750305995,1.132716208056081,220885827,0.20571595709770918,9972198.874551736,38.04091977902121,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:47:33.482936+00:00,full,1000,16.0,none,0,1,float32,65193,1000,4194,5.538095124997199,5.391603540920187,0.13452370808226988,268471120,0.2500332146883011,11771737.12776069,44.905613432924994,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:47:41.204021+00:00,full,1000,32.0,none,0,1,float32,65193,1000,8388,7.667353416967671,7.542786583013367,0.11573054600739852,268471120,0.2500332146883011,8502673.14608577,32.435124000876506,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:48:39.583213+00:00,full,10000,32.0,none,0,1,float32,65193,10000,838,58.33318895800039,56.90574599307729,1.4112978327902965,2615059264,2.4354637265205383,11175970.517733676,42.63294417470427,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:51:42.273055+00:00,full,40000,32.0,none,0,1,float32,65193,40000,209,182.62464779097354,178.9560196630191,3.6437782605644315,10435252768,9.718586474657059,14279124.047837807,54.47053546080706,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:54:34.199290+00:00,full,40000,64.0,none,0,1,float32,65193,40000,419,171.8703353749588,168.71607462427346,3.129484071803745,10460204920,9.741824977099895,15172600.869783023,57.878879050380796,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:54:35.385754+00:00,full,100,64.0,gzip,1,0,float32,65193,100,65193,1.1742297500022687,0.434457458963152,0.738662582996767,21905918,0.020401475951075554,5551979.925553244,21.17912264081285,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:54:36.750221+00:00,full,100,64.0,gzip,4,0,float32,65193,100,65193,1.3629398749908432,0.3822910000453703,0.9797278760233894,21609546,0.020125458016991615,4783263.091516637,18.2467006359735,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:54:38.626968+00:00,full,100,64.0,gzip,9,0,float32,65193,100,65193,1.8754869579570368,0.3570882090134546,1.5172298760153353,21550671,0.02007062640041113,3476057.230012123,13.260106010483257,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:58:11.956590+00:00,full,40000,4.0,lzf,0,0,float32,65193,40000,26,213.2749316249974,171.22755819390295,41.98885310429614,9974240591,9.289235427044332,12227034.7486744,46.64243602247009,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:58:17.024204+00:00,full,1000,8.0,lzf,0,0,float32,65193,1000,2097,5.053554375015665,4.005800667102449,1.039998043095693,250446381,0.23324636835604906,12900425.15863855,49.21121657805843,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:58:17.626813+00:00,full,100,32.0,lzf,0,0,float32,65193,100,65193,0.597415332973469,0.5050622500129975,0.0910364169976674,25019722,0.02330143190920353,10912508.668888682,41.627916980318766,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:58:22.495488+00:00,full,1000,32.0,lzf,0,0,float32,65193,1000,8388,4.856998957984615,3.8942452070768923,0.9580536249559373,250389462,0.23319335840642452,13422485.893851515,51.20272023716551,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:59:10.976928+00:00,full,10000,32.0,lzf,0,0,float32,65193,10000,838,48.44871129200328,38.83426175016211,9.597897790023126,2499499280,2.327840104699135,13456085.46883278,51.3308924439727,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:59:11.935094+00:00,full,100,64.0,lzf,0,0,float32,65193,100,65193,0.9099022920127027,0.7496977500268258,0.15542150003602728,25019722,0.02330143190920353,7164835.232560319,27.33167737030151,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:00:03.917279+00:00,full,10000,64.0,lzf,0,0,float32,65193,10000,1677,51.956211083976086,42.050719582883175,9.896713670925237,2499819593,2.3281384194269776,12547681.718866967,47.86560714289462,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:03:39.188496+00:00,full,40000,64.0,lzf,0,0,float32,65193,40000,419,215.21908095903927,173.54901241953485,41.6445949478657,10000394728,9.313593365252018,12116583.661540233,46.221098562394076,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:04:28.073606+00:00,full,10000,8.0,none,0,0,float32,65193,10000,209,48.854627416993026,47.64904895378277,1.1904874539468437,2608829728,2.4296620190143585,13344283.529900389,50.904401893235736,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:04:28.706844+00:00,full,100,64.0,none,0,0,float32,65193,100,65193,0.6192203329992481,0.6087561249732971,0.009396708919666708,26089248,0.024297505617141724,10528239.549924335,40.16204662294134,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw1.csv b/benchmark_results/h5_benchmark_results_gw1.csv new file mode 100644 index 0000000..a493c3d --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw1.csv @@ -0,0 +1,7 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:47:25.095959+00:00,full,10000,4.0,gzip,1,1,float32,65193,10000,104,132.94609783298802,63.04465601610718,69.64874411124038,1998823905,1.8615498254075646,4903716.698920937,18.70619468277335,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:09.283023+00:00,full,10000,4.0,gzip,4,1,float32,65193,10000,104,164.17052899999544,75.12272259069141,88.80727381497854,1967025126,1.831934904679656,3971053.781522615,15.148368002024135,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:07:46.129246+00:00,full,10000,4.0,gzip,9,1,float32,65193,10000,104,1056.813827999984,70.21516650228295,982.6115624187514,1939212753,1.80603261385113,616882.5413968845,2.353220143878496,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:14:06.791790+00:00,full,40000,4.0,gzip,1,1,float32,65193,40000,26,380.5492545419838,173.2827546976041,207.1372546683997,7950002725,7.40401700604707,6852516.379617044,26.14027549597566,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:21:01.131291+00:00,full,40000,4.0,gzip,4,1,float32,65193,40000,26,414.28676250000717,161.5560295105097,252.5716873544152,7824780727,7.287394932471216,6294480.625602791,24.011538031016507,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T22:00:30.882606+00:00,full,40000,4.0,gzip,9,1,float32,65193,40000,26,2369.712498457986,122.85574224632,2244.3883111422765,7710277062,7.1807550843805075,1100437.2900496959,4.19783512134436,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw10.csv b/benchmark_results/h5_benchmark_results_gw10.csv new file mode 100644 index 0000000..c55b75b --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw10.csv @@ -0,0 +1,19 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:45:13.100511+00:00,full,100,4.0,lzf,0,1,float32,65193,100,10485,0.5930799579946324,0.45983870909549296,0.08846129203448072,22126447,0.02060685958713293,10992278.380209573,41.93221427997426,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:20.316264+00:00,full,1000,4.0,lzf,0,1,float32,65193,1000,1048,7.194398500025272,5.794062499655411,1.3606293289922178,220787129,0.20562403742223978,9061633.158042468,34.56738723008144,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:46:13.690392+00:00,full,10000,16.0,lzf,0,1,float32,65193,10000,419,53.327877416973934,42.338993915123865,10.971396750130225,2204941405,2.0535117061808705,12224938.091994915,46.63443791196791,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:49:43.216172+00:00,full,40000,16.0,lzf,0,1,float32,65193,40000,104,209.48347766703228,164.38189641048666,45.078501051466446,8806369201,8.201570437289774,12448332.579932118,47.48662025425765,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:49:47.371785+00:00,full,1000,8.0,none,0,1,float32,65193,1000,2097,4.142962916987017,3.999338338209782,0.1345959628233686,268471120,0.2500332146883011,15735839.6167861,60.02746435846748,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:49:58.764113+00:00,full,1000,8.0,gzip,1,0,float32,65193,1000,2097,11.384380707982928,3.901193918194622,7.4618556260247715,219433315,0.2043631998822093,5726530.205923764,21.84497911805635,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:13.548643+00:00,full,1000,8.0,gzip,4,0,float32,65193,1000,2097,14.782380124961492,3.9729808339616284,10.758243003103416,216472434,0.20160566456615925,4410182.896725492,16.823512637044875,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:31.973723+00:00,full,1000,8.0,gzip,9,0,float32,65193,1000,2097,18.422743916045874,3.8271494149812497,14.532373125839513,215866830,0.20104165188968182,3538723.6720594093,13.499159515607488,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:52:17.954924+00:00,full,10000,8.0,gzip,1,0,float32,65193,10000,209,105.94155716605019,33.88383767119376,71.84810749802273,2184124301,2.0341242672875524,6153675.832593068,23.47441037213542,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:54:42.899887+00:00,full,10000,8.0,gzip,4,0,float32,65193,10000,209,144.92346862499835,35.109704423404764,109.59393520251615,2154254337,2.0063056955114007,4498443.255501451,17.16019918633061,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:57:34.995819+00:00,full,10000,8.0,gzip,9,0,float32,65193,10000,209,172.0736117499764,31.5231515407213,139.9605630094302,2148502815,2.000949173234403,3788669.240855226,14.452626193447975,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:57:46.447463+00:00,full,1000,16.0,gzip,1,0,float32,65193,1000,4194,11.43793266598368,4.22463166777743,7.210249793017283,219368185,0.2043025428429246,5699718.813163104,21.74270177140466,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:58:01.082436+00:00,full,1000,16.0,gzip,4,0,float32,65193,1000,4194,14.632255291973706,4.128421083849389,10.499396789993625,216386311,0.20152545627206564,4455430.73840166,16.996119454962386,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:58:18.884556+00:00,full,1000,16.0,gzip,9,0,float32,65193,1000,4194,17.797879500023555,4.11371545586735,13.6785010821186,215792316,0.20097225531935692,3662964.455957448,13.973100494222443,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:59:25.887348+00:00,full,10000,16.0,lzf,0,0,float32,65193,10000,419,66.97517854202306,52.76101750286762,14.189454341656528,2498705439,2.3271007826551795,9733904.622455789,37.131899347136645,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:03:04.149192+00:00,full,40000,32.0,lzf,0,0,float32,65193,40000,209,218.1732046250254,175.15440791362198,42.99365812452743,9996200927,9.309687583707273,11952521.871243961,45.59525249955735,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:06:27.472902+00:00,full,40000,4.0,none,0,0,float32,65193,40000,26,203.24360574997263,197.10321076319087,6.098745712370146,10435352176,9.718679055571556,12830514.349405805,48.94452800524065,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:06:31.990468+00:00,full,1000,32.0,none,0,0,float32,65193,1000,8388,4.498844666988589,4.404839666036423,0.08608587400522083,268471120,0.2500332146883011,14491053.776177278,55.27898321600829,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw11.csv b/benchmark_results/h5_benchmark_results_gw11.csv new file mode 100644 index 0000000..d5cf1b3 --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw11.csv @@ -0,0 +1,22 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:46:25.374909+00:00,full,10000,4.0,lzf,0,1,float32,65193,10000,104,75.85084587498568,60.82900304417126,14.951506256998982,2201447292,2.050257559865713,8594894.262280013,32.78691964065557,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:51:08.591514+00:00,full,40000,4.0,lzf,0,1,float32,65193,40000,26,283.1481705000042,229.18335936497897,53.862998132186476,8791546101,8.187765349633992,9209736.356039641,35.13235609451157,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:53:51.305023+00:00,full,40000,8.0,none,0,1,float32,65193,40000,52,162.654260625015,157.8383224811987,4.781719161081128,10435294624,9.718625456094742,16032288.30268312,61.15832634995697,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:54:02.698028+00:00,full,1000,32.0,gzip,1,0,float32,65193,1000,8388,11.343450999993365,4.253829249180853,7.06057995808078,219382142,0.2043155413120985,5747192.807553727,21.92380068799487,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:54:18.274126+00:00,full,1000,32.0,gzip,4,0,float32,65193,1000,8388,15.57250683294842,4.689151833066717,10.877027375914622,216393544,0.20153219252824783,4186416.5287803365,15.969911685105654,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:54:36.537376+00:00,full,1000,32.0,gzip,9,0,float32,65193,1000,8388,18.259554208023474,4.2419667090289295,14.010787625098601,215802852,0.20098206773400307,3570350.0346877794,13.619804514647596,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:55:28.596749+00:00,full,10000,4.0,lzf,0,0,float32,65193,10000,104,52.02740812499542,41.48106421419652,10.505169531214051,2496395179,2.3249491853639483,12530510.811412007,47.80010532917788,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:55:29.281240+00:00,full,100,8.0,lzf,0,0,float32,65193,100,20971,0.6665804170188494,0.534341624006629,0.11699262488400564,25113822,0.023389069363474846,9780215.31018912,37.3085606010022,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:56:16.717430+00:00,full,10000,8.0,lzf,0,0,float32,65193,10000,209,47.41433041699929,37.5551248700358,9.829901287565008,2497676402,2.326142417266965,13749640.546780048,52.45071619712848,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:59:22.589793+00:00,full,40000,8.0,lzf,0,0,float32,65193,40000,52,185.80946179199964,145.24484524608124,40.515076550422236,9985073269,9.299324144609272,14034376.801107984,53.53689880793756,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:59:23.383843+00:00,full,100,16.0,lzf,0,0,float32,65193,100,41943,0.7767149159917608,0.6677643330185674,0.1077181650325656,25111883,0.023387263529002666,8393427.06799409,32.018383285499915,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:59:23.859842+00:00,full,100,4.0,none,0,0,float32,65193,100,10485,0.46724745800020173,0.43769895896548405,0.022781039006076753,29370048,0.027352988719940186,13952563.868195908,53.224807236465104,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:59:28.272006+00:00,full,1000,4.0,none,0,0,float32,65193,1000,1048,4.396907541959081,4.2386207929230295,0.1449304138077423,264151120,0.24600990116596222,14827011.798149543,56.560561363790676,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:00:16.387823+00:00,full,10000,4.0,none,0,0,float32,65193,10000,104,48.06857070798287,46.55484708369477,1.4953492900240235,2608842808,2.4296742007136345,13562500.203313353,51.73683244061795,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:00:16.806105+00:00,full,100,8.0,none,0,0,float32,65193,100,20971,0.4105522919562645,0.3967809990281239,0.010825540986843407,33565648,0.03126044571399689,15879341.384104343,60.57488015786874,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:00:21.117347+00:00,full,1000,16.0,none,0,0,float32,65193,1000,4194,4.303420624986757,4.202055376081262,0.09424596012104303,268471120,0.2500332146883011,15149111.760415152,57.78927520910321,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:01:02.896202+00:00,full,10000,16.0,none,0,0,float32,65193,10000,419,41.75603337504435,40.75960820220644,0.9842475007171743,2615061880,2.4354661628603935,15612833.5789105,59.558233562128066,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:03:57.388584+00:00,full,40000,16.0,none,0,0,float32,65193,40000,104,174.44955237500835,170.71631158335367,3.6991135432035662,10435265848,9.718598656356335,14948275.673383625,57.02314633706522,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:03:57.885450+00:00,full,100,32.0,none,0,0,float32,65193,100,65193,0.48648408299777657,0.47624754201387987,0.008695248980075121,26089248,0.024297505617141724,13400849.540291734,51.120184098402916,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:04:45.113333+00:00,full,10000,64.0,none,0,0,float32,65193,10000,1677,47.21202845900552,46.36709825199796,0.830019292247016,2616614032,2.436911717057228,13808557.295225613,52.67546575632329,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:07:30.802913+00:00,full,40000,64.0,none,0,0,float32,65193,40000,419,165.62822529202094,162.58897941571195,3.0143454120261595,10460204920,9.741824977099895,15744417.92998929,60.060188026387365,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw12.csv b/benchmark_results/h5_benchmark_results_gw12.csv new file mode 100644 index 0000000..8b0146e --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw12.csv @@ -0,0 +1,17 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:45:08.857966+00:00,full,100,8.0,lzf,0,1,float32,65193,100,20971,0.4866867499658838,0.3652613759622909,0.10924499994143844,22170373,0.02064776886254549,13395269.134524405,51.09889653978121,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:13.639134+00:00,full,1000,8.0,lzf,0,1,float32,65193,1000,2097,4.765591999981552,3.657438918831758,1.096213877084665,220901586,0.20573063381016254,13679937.350963399,52.18481960664138,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:14.219411+00:00,full,100,16.0,lzf,0,1,float32,65193,100,41943,0.5761926249833778,0.454097249021288,0.11856487498153001,22169791,0.020647226832807064,11314445.408231443,43.16118396084382,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:20.348750+00:00,full,1000,16.0,lzf,0,1,float32,65193,1000,4194,6.119177667016629,4.872704623092432,1.2374493329552934,220902621,0.20573159772902727,10653882.522712318,40.64133652768066,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:21.099774+00:00,full,100,32.0,lzf,0,1,float32,65193,100,65193,0.7221405829768628,0.6033592909807339,0.11729441705392674,22072753,0.020556853152811527,9027743.563622538,34.43810868691459,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:27.873139+00:00,full,1000,32.0,lzf,0,1,float32,65193,1000,8388,6.762345416995231,5.488946874975227,1.2633516258792952,220891732,0.20572145655751228,9640590.05861427,36.77593253560742,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:33.961358+00:00,full,1000,4.0,none,0,1,float32,65193,1000,1048,6.070372916001361,5.866790418047458,0.1920432480983436,264151120,0.24600990116596222,10739537.900242139,40.96808586212974,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:46:17.777635+00:00,full,10000,4.0,none,0,1,float32,65193,10000,104,43.7571622080286,42.26750620739767,1.469307481951546,2608842808,2.4296742007136345,14898818.0929243,56.83448064012261,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:06.018792+00:00,full,40000,4.0,none,0,1,float32,65193,40000,26,228.19085224997252,220.58197070180904,7.552472730982117,10435352176,9.718679055571556,11427802.535849962,43.59360708560929,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:48.063857+00:00,full,10000,8.0,none,0,1,float32,65193,10000,209,42.01335945801111,40.72950633778237,1.2576902461005375,2608829728,2.4296620190143585,15517207.107695121,59.193447523861394,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:58:12.062122+00:00,full,40000,8.0,gzip,1,0,float32,65193,40000,52,443.93223712500185,155.2411122706253,288.5415926784044,8719318288,8.120497956871986,5874139.749093558,22.408064838766318,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:08:17.495288+00:00,full,40000,8.0,gzip,4,0,float32,65193,40000,52,605.3633766249986,161.44840477989055,443.72273688408313,8604717142,8.013767322525382,4307693.693890886,16.43254735523562,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:19:14.542955+00:00,full,40000,8.0,gzip,9,0,float32,65193,40000,52,657.0003732499899,137.55711991514545,518.9873244926566,8584917175,7.9953271662816405,3969130.1651783953,15.14102998801573,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:19:15.660368+00:00,full,100,32.0,gzip,1,0,float32,65193,100,65193,1.066246250004042,0.40918704197974876,0.6545640410040505,21905918,0.020401475951075554,6114253.625722282,23.32402658738053,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:19:16.955613+00:00,full,100,32.0,gzip,4,0,float32,65193,100,65193,1.2932369170011953,0.3734803749830462,0.9182971669943072,21609546,0.020125458016991615,5041071.681681644,19.23016235993059,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:19:18.546823+00:00,full,100,32.0,gzip,9,0,float32,65193,100,65193,1.590066166012548,0.3705453329603188,1.2188914999715053,21550671,0.02007062640041113,4100018.0617317487,15.640327689101214,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw13.csv b/benchmark_results/h5_benchmark_results_gw13.csv new file mode 100644 index 0000000..b5444d8 --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw13.csv @@ -0,0 +1,11 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:45:57.168077+00:00,full,10000,8.0,lzf,0,1,float32,65193,10000,209,48.887907166965306,38.41095545591088,10.435194955673069,2203097094,2.051794057711959,13335199.598000469,50.86974944305599,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:49:28.837180+00:00,full,40000,8.0,lzf,0,1,float32,65193,40000,52,211.61564904195257,163.7230789619498,47.828436769137625,8799706194,8.195365028455853,12322907.17537162,47.008160306440814,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:49:29.468766+00:00,full,100,8.0,none,0,1,float32,65193,100,20971,0.5884755830047652,0.562020749959629,0.02243779198033735,33565648,0.03126044571399689,11078284.61924003,42.26030204483044,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:51:34.011660+00:00,full,10000,4.0,gzip,1,0,float32,65193,10000,104,124.51707829197403,50.264919426466804,74.06270571326604,2181540848,2.0317182391881943,5235667.339313256,19.972485883000395,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:54:15.844966+00:00,full,10000,4.0,gzip,4,0,float32,65193,10000,104,161.8103843339486,50.697387551364955,110.77230075426633,2152104269,2.0043032886460423,4028975.041889335,15.369320075566616,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:57:22.448397+00:00,full,10000,4.0,gzip,9,0,float32,65193,10000,104,186.57912333396962,44.39638928882778,141.59556717507076,2146716537,1.9992855722084641,3494120.823116259,13.329013149704968,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:05:46.487815+00:00,full,40000,4.0,gzip,1,0,float32,65193,40000,26,503.9569528750144,196.40219909878215,307.39574028080096,8699638399,8.10216963198036,5174489.577181678,19.73911124108001,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:15:08.330949+00:00,full,40000,4.0,gzip,4,0,float32,65193,40000,26,561.7989925830043,168.11050123505993,393.5089676656062,8588683026,7.998834388330579,4641731.356637697,17.706799913931643,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:25:25.460881+00:00,full,40000,4.0,gzip,9,0,float32,65193,40000,26,617.0838448330178,145.33032905077562,471.3282609746675,8570762666,7.982144752517343,4225876.308114412,16.12043879743352,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:25:29.499511+00:00,full,1000,16.0,lzf,0,0,float32,65193,1000,4194,4.01882791594835,3.1730971669894643,0.8446297501795925,250428104,0.2332293465733528,16221893.886346206,61.88161425150378,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw2.csv b/benchmark_results/h5_benchmark_results_gw2.csv new file mode 100644 index 0000000..c5f1b68 --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw2.csv @@ -0,0 +1,27 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:45:08.513583+00:00,full,100,8.0,gzip,1,1,float32,65193,100,20971,0.7308998330263421,0.294459291966632,0.4139464600593783,20233969,0.01884435210376978,8919553.27586597,34.02539549204243,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:09.537415+00:00,full,100,8.0,gzip,4,1,float32,65193,100,20971,1.022196999983862,0.40653174999170005,0.5645020409720019,19906739,0.018539595417678356,6377733.450697785,24.329122355261937,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:17.717100+00:00,full,100,8.0,gzip,9,1,float32,65193,100,20971,8.177320625050925,0.44374004198471084,7.558465500071179,19711309,0.018357587046921253,797241.5781311499,3.041235268139457,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:27.469420+00:00,full,1000,8.0,gzip,1,1,float32,65193,1000,2097,9.727037834003568,4.648327288974542,5.044876752013806,201621381,0.18777454365044832,6702245.957356075,25.56703932707243,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:38.753719+00:00,full,1000,8.0,gzip,4,1,float32,65193,1000,2097,11.280236167018302,4.342033335997257,6.880105710763019,198577238,0.18493946455419064,5779400.274491986,22.046662424056954,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:47:17.204621+00:00,full,1000,8.0,gzip,9,1,float32,65193,1000,2097,98.44089787499979,5.683668498881161,92.40486641577445,196434969,0.1829443210735917,662255.2354488075,2.5263032358124065,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:48:38.520009+00:00,full,10000,64.0,lzf,0,1,float32,65193,10000,1677,81.2851963750436,67.93829966406338,13.332075039797928,2207670964,2.0560538060963154,8020279.572088938,30.594938553195718,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:48:39.219838+00:00,full,100,16.0,none,0,1,float32,65193,100,41943,0.6642400000127964,0.6410147499991581,0.020275291986763477,33566448,0.03126119077205658,9814675.418334348,37.44001548131694,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:48:47.539302+00:00,full,1000,64.0,none,0,1,float32,65193,1000,16777,8.31025145901367,8.143837000010535,0.1572415839182213,268487120,0.25004811584949493,7844888.968947955,29.92587649897749,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:49:39.376124+00:00,full,10000,64.0,none,0,1,float32,65193,10000,1677,51.7969737079693,50.74179624917451,1.0259631660883315,2616614032,2.436911717057228,12586256.557681793,48.01275847504346,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:49:52.204520+00:00,full,1000,4.0,gzip,1,0,float32,65193,1000,1048,12.7948703749571,5.077202123822644,7.56151354231406,219332832,0.2042696177959442,5095245.054424289,19.43681737680164,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:07.881376+00:00,full,1000,4.0,gzip,4,0,float32,65193,1000,1048,15.674986083002295,4.907216042978689,10.570755130145699,216365288,0.2015058770775795,4159046.7547970745,15.86550428313093,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:27.597471+00:00,full,1000,4.0,gzip,9,0,float32,65193,1000,1048,19.714418666029815,5.070008040522225,14.334120125859044,215761382,0.20094344578683376,3306869.0030578966,12.614704143745028,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:28.765635+00:00,full,100,8.0,gzip,1,0,float32,65193,100,20971,1.156631083053071,0.42681749997427687,0.7084555000183173,21964550,0.02045608125627041,5636455.820287571,21.501372605467115,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:30.212536+00:00,full,100,8.0,gzip,4,0,float32,65193,100,20971,1.4457777499919757,0.42724741593701765,0.9696776670170948,21652034,0.020165028050541878,4509199.287398207,17.20123019179614,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:32.039330+00:00,full,100,8.0,gzip,9,0,float32,65193,100,20971,1.8256148329819553,0.3624419579282403,1.3906996679143049,21593866,0.020110854879021645,3571016.12137506,13.622345433712235,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:33.360410+00:00,full,100,16.0,gzip,1,0,float32,65193,100,41943,1.290743499994278,0.5211836249800399,0.7573581660399213,21973038,0.020463986322283745,5050809.862710059,19.267310572471843,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:35.159023+00:00,full,100,16.0,gzip,4,0,float32,65193,100,41943,1.79683720797766,0.5390092079760507,1.2554793750168756,21653370,0.020166272297501564,3628208.482691357,13.84051697804015,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:50:37.336226+00:00,full,100,16.0,gzip,9,0,float32,65193,100,41943,2.175490790978074,0.49968616699334234,1.6731087090447545,21596179,0.02011300902813673,2996703.101220209,11.43151512611469,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:52:33.619808+00:00,full,10000,16.0,gzip,1,0,float32,65193,10000,419,116.25902450003196,43.7392422928242,72.49753849307308,2186784124,2.036601420491934,5607564.684149064,21.39116166743875,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:55:04.091873+00:00,full,10000,16.0,gzip,4,0,float32,65193,10000,419,150.45809908397496,42.873106455022935,107.57360433711438,2156527297,2.0084225544705987,4332967.144800489,16.528957919313388,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:58:05.545240+00:00,full,10000,16.0,gzip,9,0,float32,65193,10000,419,181.4327741249581,40.74819337588269,140.67611537134508,2150529721,2.0028368765488267,3593231.7253276226,13.707091237364283,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:06:08.099596+00:00,full,40000,16.0,gzip,1,0,float32,65193,40000,104,482.488542334002,175.4104132942739,307.0594882434234,8730453217,8.130868167616427,5404729.379448786,20.61740638522639,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:15:06.802740+00:00,full,40000,16.0,gzip,4,0,float32,65193,40000,104,538.651798292005,152.4689789598342,386.16226643521804,8613461924,8.021911535412073,4841197.984057126,18.46770471213198,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:25:01.684978+00:00,full,40000,16.0,gzip,9,0,float32,65193,40000,104,594.8251796670374,126.85207080096006,467.95665837154957,8592038886,8.001959776505828,4384010.78020892,16.72367393573349,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:27:17.417927+00:00,full,40000,16.0,lzf,0,0,float32,65193,40000,104,135.67503462499008,106.00282738229726,29.65550375042949,9991045724,9.304886426776648,19220337.825656854,73.31977014792196,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw3.csv b/benchmark_results/h5_benchmark_results_gw3.csv new file mode 100644 index 0000000..d8839ae --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw3.csv @@ -0,0 +1,7 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:46:37.257644+00:00,full,10000,8.0,gzip,1,1,float32,65193,10000,209,89.74604662496131,38.25579520693282,51.32694279967109,2002249335,1.8647400056943297,7264163.988463387,27.710586503842876,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:48:40.276567+00:00,full,10000,8.0,gzip,4,1,float32,65193,10000,209,122.98789304099046,48.82158694509417,73.89442267036065,1970697089,1.8353546867147088,5300765.659776928,20.220816268069946,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:06:35.887889+00:00,full,10000,8.0,gzip,9,1,float32,65193,10000,209,1075.5797649999731,60.39143141207751,1009.740117252979,1944118294,1.8106012549251318,606119.621449011,2.3121628625832025,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:12:25.161895+00:00,full,40000,8.0,gzip,1,1,float32,65193,40000,52,349.19259483303176,141.3589519901434,207.70717958605383,7978251734,7.430325945839286,7467855.958534558,28.487609705103143,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:18:54.991740+00:00,full,40000,8.0,gzip,4,1,float32,65193,40000,52,389.75544591702055,134.28512714529643,255.30534199345857,7852023884,7.312767099589109,6690656.993552791,25.522830938540615,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:58:15.544792+00:00,full,40000,8.0,gzip,9,1,float32,65193,40000,52,2360.4779739169753,104.04177613643697,2254.053070039605,7737044855,7.205684534274042,1104742.3567663084,4.214257647576555,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw4.csv b/benchmark_results/h5_benchmark_results_gw4.csv new file mode 100644 index 0000000..95b85bb --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw4.csv @@ -0,0 +1,18 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:45:09.040497+00:00,full,100,16.0,gzip,1,1,float32,65193,100,41943,0.8138096250477247,0.3914600830175914,0.4205720410682261,20243237,0.018852983601391315,8010841.601458922,30.558935552440346,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:10.168351+00:00,full,100,16.0,gzip,4,1,float32,65193,100,41943,1.125731625012122,0.504006999952253,0.6203878339729272,19906998,0.01853983663022518,5791167.144238129,22.09154946990253,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:18.081801+00:00,full,100,16.0,gzip,9,1,float32,65193,100,41943,7.910770207992755,0.3832506659673527,7.522163457935676,19724219,0.01836961042135954,824104.3322700913,3.1437085429004337,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:30.860796+00:00,full,1000,16.0,gzip,1,1,float32,65193,1000,4194,12.707583334005903,6.53570683428552,6.162654874904547,201616563,0.18777005653828382,5130243.75181876,19.570326812052766,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:45.625882+00:00,full,1000,16.0,gzip,4,1,float32,65193,1000,4194,14.760562249983195,6.393919789989013,8.358500456961337,198584107,0.18494586180895567,4416701.674089293,16.848379799229786,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:47:23.529505+00:00,full,1000,16.0,gzip,9,1,float32,65193,1000,4194,97.8988012910122,5.984879874973558,91.91005287598819,196528417,0.18303135130554438,665922.3518601465,2.54029217475947,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:47:24.273843+00:00,full,100,4.0,none,0,1,float32,65193,100,10485,0.7301809170166962,0.7033679159358144,0.021837416978087276,29370048,0.027352988719940186,8928335.222229494,34.05889595882223,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:48:21.415924+00:00,full,10000,16.0,none,0,1,float32,65193,10000,419,57.09504295804072,55.54277516936418,1.5276798641425557,2615061880,2.4354661628603935,11418329.26685255,43.557469432268334,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:51:25.541143+00:00,full,40000,16.0,none,0,1,float32,65193,40000,104,184.05021754198242,179.8601315288688,4.1656566697056405,10435265848,9.718598656356335,14168524.410492321,54.048631326646124,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:51:26.169760+00:00,full,100,64.0,none,0,1,float32,65193,100,65193,0.6113521250081249,0.5995947089977562,0.010379792016465217,26089248,0.024297505617141724,10663739.820832975,40.67893913586798,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:53:25.560708+00:00,full,10000,32.0,gzip,1,0,float32,65193,10000,838,119.36323779198574,45.06420787982643,74.28772208216833,2189593060,2.039217445999384,5461731.870378031,20.834853631507993,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:55:50.510845+00:00,full,10000,32.0,gzip,4,0,float32,65193,10000,838,144.93180741701508,42.22049512719968,102.69881171209272,2159047572,2.0107697434723377,4498184.433208573,17.159211857637683,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:58:53.140540+00:00,full,10000,32.0,gzip,9,0,float32,65193,10000,838,182.61091874999693,42.40506141912192,140.19354537309846,2152872782,2.005019022151828,3570049.3949790774,13.618657665172872,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:06:59.001551+00:00,full,40000,32.0,gzip,1,0,float32,65193,40000,209,485.7840722500114,180.35346105491044,305.4100448387326,8740866907,8.140566672198474,5368064.0205468135,20.477539140879873,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:15:50.793062+00:00,full,40000,32.0,gzip,4,0,float32,65193,40000,209,531.7480210409849,153.10616762185236,378.62351487891283,8622200500,8.030049968510866,4904052.101397492,18.707474141683548,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:25:39.080309+00:00,full,40000,32.0,gzip,9,0,float32,65193,40000,209,588.2507489999989,127.50164549052715,460.72863516415237,8599354922,8.008773365989327,4433007.530263264,16.910581704190307,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:25:42.733945+00:00,full,1000,64.0,lzf,0,0,float32,65193,1000,16777,3.6390581249725074,2.911406250030268,0.7268712930381298,250372961,0.2331779906526208,17914800.412948206,68.33954014949114,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw5.csv b/benchmark_results/h5_benchmark_results_gw5.csv new file mode 100644 index 0000000..0709855 --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw5.csv @@ -0,0 +1,7 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:46:39.674134+00:00,full,10000,16.0,gzip,1,1,float32,65193,10000,419,91.30458858399652,40.54028499900596,50.73747621575603,2006222610,1.868440406396985,7140166.8865771275,27.237575098331938,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:48:47.687694+00:00,full,10000,16.0,gzip,4,1,float32,65193,10000,419,127.98504183301702,54.02569558605319,73.90154024632648,1974936316,1.8393027745187283,5093798.389741339,19.431298788991313,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:06:51.293050+00:00,full,10000,16.0,gzip,9,1,float32,65193,10000,419,1083.571875958005,63.93326387798879,1019.6075335419155,1949705008,1.8158042877912521,601649.059434675,2.295109021891308,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:12:47.762700+00:00,full,40000,16.0,gzip,1,1,float32,65193,40000,104,356.40117749996716,150.60119533207035,205.7792309941724,7993552970,7.444576332345605,7316810.843028823,27.911418315997402,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:19:25.688989+00:00,full,40000,16.0,gzip,4,1,float32,65193,40000,104,397.8905234169797,143.1330763680162,254.7410520609701,7866991466,7.326706746593118,6553863.051589123,25.001003462177746,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:59:14.302827+00:00,full,40000,16.0,gzip,9,1,float32,65193,40000,104,2388.5568772500264,108.28314333868911,2280.258260822855,7755423721,7.2228011870756745,1091755.4548679194,4.16471654841583,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw6.csv b/benchmark_results/h5_benchmark_results_gw6.csv new file mode 100644 index 0000000..8d6c28e --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw6.csv @@ -0,0 +1,16 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:45:08.303852+00:00,full,100,32.0,gzip,1,1,float32,65193,100,65193,0.7411320839892142,0.3346407079952769,0.40506233304040506,20164389,0.018779550679028034,8796407.740047153,33.55563255328046,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:09.290305+00:00,full,100,32.0,gzip,4,1,float32,65193,100,65193,0.984869499981869,0.4268712500343099,0.5571786250220612,19855617,0.018491984345018864,6619455.6741984775,25.251219460290823,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:17.177529+00:00,full,100,32.0,gzip,9,1,float32,65193,100,65193,7.88440083299065,0.3745300000300631,7.507821208972018,19661586,0.018311278894543648,826860.5488347742,3.1542226746932,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:29.988204+00:00,full,1000,32.0,gzip,1,1,float32,65193,1000,8388,12.76059795904439,6.132901040953584,6.61938437592471,201710077,0.18785714823752642,5108929.864355835,19.489020783828106,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:44.855396+00:00,full,1000,32.0,gzip,4,1,float32,65193,1000,8388,14.863922916003503,6.5123888330417685,8.34592716593761,198668174,0.18502415530383587,4385988.838102007,16.731219627769498,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:47:25.297143+00:00,full,1000,32.0,gzip,9,1,float32,65193,1000,8388,100.4374088330078,6.097051959077362,94.33057649905095,196689373,0.18318125326186419,649090.8194216072,2.4760849739898956,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:48:41.836830+00:00,full,10000,32.0,lzf,0,1,float32,65193,10000,838,76.48172666697064,63.097721415048,13.360645543027204,2206697097,2.0551468217745423,8523996.886717021,32.516467615955435,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:52:25.250738+00:00,full,40000,32.0,lzf,0,1,float32,65193,40000,209,223.30224954203004,179.51982987241354,43.75883662304841,8812989985,8.207736522890627,11677983.564196803,44.547971970355235,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:52:26.630756+00:00,full,100,4.0,gzip,1,0,float32,65193,100,10485,1.361830750014633,0.5033829590538517,0.6134545409004204,21953222,0.020445531234145164,4787158.756644282,18.261561419083716,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:52:28.915318+00:00,full,100,4.0,gzip,4,0,float32,65193,100,10485,2.2829006660031155,0.8119959169998765,1.173248206905555,21653958,0.020166819915175438,2855709.0096320044,10.893665350463884,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:52:31.088741+00:00,full,100,4.0,gzip,9,0,float32,65193,100,10485,2.1675893750507385,0.5344155839993618,1.3238523759646341,21596216,0.02011304348707199,3007626.8480728264,11.473185913363748,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:00:10.060764+00:00,full,40000,64.0,gzip,1,0,float32,65193,40000,419,458.90289212501375,175.02904182416387,283.8349640731467,8751514993,8.150483475066721,5682509.40394947,21.677053085134393,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:10:20.149500+00:00,full,40000,64.0,gzip,4,0,float32,65193,40000,419,610.0466752090142,179.94943899440113,430.07990080019226,8631323114,8.03854606486857,4274623.739415583,16.306395490324338,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:21:13.431860+00:00,full,40000,64.0,gzip,9,0,float32,65193,40000,419,653.2482826249907,151.68891054595588,501.5430964518455,8607509250,8.016367675736547,3991927.831055639,15.227996181700282,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:21:13.856878+00:00,full,100,4.0,lzf,0,0,float32,65193,100,10485,0.41429133398924023,0.32686083303997293,0.0653255830402486,25071069,0.02334925252944231,15736027.923213368,60.02818269048068,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw7.csv b/benchmark_results/h5_benchmark_results_gw7.csv new file mode 100644 index 0000000..816e0df --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw7.csv @@ -0,0 +1,7 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:46:48.989913+00:00,full,10000,32.0,gzip,1,1,float32,65193,10000,838,96.46690704103094,44.41522458131658,52.041019165946636,2010375789,1.872308355756104,6758068.854873829,25.77998678159267,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:49:22.011637+00:00,full,10000,32.0,gzip,4,1,float32,65193,10000,838,152.99875654204516,64.9959970380296,87.99178462906275,1979480335,1.84353472199291,4261015.0221766345,16.254482353884256,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:07:29.855345+00:00,full,10000,32.0,gzip,9,1,float32,65193,10000,838,1087.8170113749802,63.994120465184096,1023.8017894591903,1955772401,1.821454987861216,599301.162955682,2.2861525076129228,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:13:22.249580+00:00,full,40000,32.0,gzip,1,1,float32,65193,40000,209,352.31993733299896,150.58089128916617,201.72167353983968,8009090319,7.459046616218984,7401568.074006796,28.234741493251022,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:20:01.804541+00:00,full,40000,32.0,gzip,4,1,float32,65193,40000,209,399.5155397920171,147.4598252462456,252.037675581465,7883274973,7.341871944256127,6527205.428248291,24.899312699311412,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T22:00:19.596627+00:00,full,40000,32.0,gzip,9,1,float32,65193,40000,209,2417.7483577500097,112.18474166665692,2305.546731794311,7776917027,7.242818388156593,1078573.786077051,4.11443247252293,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw8.csv b/benchmark_results/h5_benchmark_results_gw8.csv new file mode 100644 index 0000000..cd8d279 --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw8.csv @@ -0,0 +1,22 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:45:08.658919+00:00,full,100,64.0,gzip,1,1,float32,65193,100,65193,0.747277291957289,0.32121679198462516,0.4246206660172902,20164389,0.018779550679028034,8724070.796965437,33.27968901430297,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:09.665216+00:00,full,100,64.0,gzip,4,1,float32,65193,100,65193,1.001397791027557,0.4043362920056097,0.594245915999636,19855617,0.018491984345018864,6510200.100711624,24.834442522856232,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:17.927939+00:00,full,100,64.0,gzip,9,1,float32,65193,100,65193,8.255037874972913,0.41636504197958857,7.833865707973018,19661586,0.018311278894543648,789735.9283795401,3.0126034865552525,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:33.095225+00:00,full,1000,64.0,gzip,1,1,float32,65193,1000,16777,15.117081832955591,7.8975342500489205,7.208116541907657,202049109,0.18817289639264345,4312538.671179099,16.45102947684898,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:45:46.458898+00:00,full,1000,64.0,gzip,4,1,float32,65193,1000,16777,13.350839999970049,5.629151292028837,7.711936582927592,198988521,0.1853225016966462,4883063.537586118,18.627409124702904,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:47:26.718097+00:00,full,1000,64.0,gzip,9,1,float32,65193,1000,16777,100.25242537498707,5.884701040980872,94.35773983201943,197061134,0.1835274826735258,650288.5067982168,2.480653788750522,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:51:44.619019+00:00,full,40000,64.0,lzf,0,1,float32,65193,40000,419,257.8160614580265,211.2465300792246,46.53048778377706,8820350185,8.214591243304312,10114653.00203784,38.584339149619446,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:51:45.097427+00:00,full,100,32.0,none,0,1,float32,65193,100,65193,0.4587100410135463,0.4487578329863027,0.008558792003896087,26089248,0.024297505617141724,14212246.118692389,54.215416407365375,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:51:57.042997+00:00,full,1000,64.0,gzip,1,0,float32,65193,1000,16777,11.937259332975373,4.788569959055167,7.140997290902305,219393913,0.2043265039101243,5461303.820376213,20.833220750336505,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:52:11.643074+00:00,full,1000,64.0,gzip,4,0,float32,65193,1000,16777,14.59273066703463,4.497025999997277,10.084027874050662,216413120,0.2015504240989685,4467498.337872619,17.04215369366691,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:52:32.028871+00:00,full,1000,64.0,gzip,9,0,float32,65193,1000,16777,20.37879087496549,5.076363208936527,15.296333458041772,215820531,0.2009985325857997,3199061.239697343,12.203450163640378,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:54:33.137620+00:00,full,10000,64.0,gzip,1,0,float32,65193,10000,1677,121.01008466695203,46.69676108582644,74.30091495701345,2191289630,2.0407974999397993,5387402.23010556,20.551308556005708,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:56:53.350550+00:00,full,10000,64.0,gzip,4,0,float32,65193,10000,1677,140.20516700000735,40.8736412487342,99.31077278894372,2161301552,2.0128689259290695,4649828.632920253,17.73768857162572,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:00:01.555680+00:00,full,10000,64.0,gzip,9,0,float32,65193,10000,1677,188.19406299997354,45.29866054083686,142.86906837008428,2155088061,2.0070821614935994,3464136.9106319346,13.214633600738276,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:00:07.891191+00:00,full,1000,4.0,lzf,0,0,float32,65193,1000,1048,6.304808082990348,5.1197471266495995,1.159005162131507,250388771,0.23319271486252546,10340203.721011471,39.44474686054791,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:00:12.779253+00:00,full,1000,8.0,none,0,0,float32,65193,1000,2097,4.8665980829973705,4.726950370008126,0.128287247964181,268471120,0.2500332146883011,13396010.701555036,51.101725393505234,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:03:09.107466+00:00,full,40000,8.0,none,0,0,float32,65193,40000,52,176.2658677920117,171.63587032729993,4.597294671169948,10435294624,9.718625456094742,14794242.542050337,56.43555657215247,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:03:09.658077+00:00,full,100,16.0,none,0,0,float32,65193,100,41943,0.5383595839957707,0.5251967079821043,0.012282082985620946,33566448,0.03126119077205658,12109564.302009743,46.19432183078668,20260413,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08214424861564894,0.0314140992773477,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:03:56.462934+00:00,full,10000,32.0,none,0,0,float32,65193,10000,838,46.77680712501751,45.861199123901315,0.9012692503165454,2615059264,2.4354637265205383,13937035.040840358,53.16556946121352,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:06:52.515148+00:00,full,40000,32.0,none,0,0,float32,65193,40000,209,176.00219512498006,172.54507853923133,3.435288210923318,10435252768,9.718586474657059,14816406.11441377,56.52010389104374,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:06:56.971986+00:00,full,1000,64.0,none,0,0,float32,65193,1000,16777,4.441291374969296,4.3463043319643475,0.08662041794741526,268487120,0.25004811584949493,14678838.764648873,55.99532609805631,20261313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.0819870078075867,0.03067065484951055,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/h5_benchmark_results_gw9.csv b/benchmark_results/h5_benchmark_results_gw9.csv new file mode 100644 index 0000000..dc0cfb0 --- /dev/null +++ b/benchmark_results/h5_benchmark_results_gw9.csv @@ -0,0 +1,7 @@ +timestamp_utc,run_kind,num_input_files,target_chunk_mb,compression,compression_level,shuffle,dtype,num_items,chunk_subjects,chunk_items,elapsed_seconds,data_generation_seconds,hdf5_write_seconds,output_size_bytes,output_size_gb,throughput_values_per_second,throughput_mb_per_second,seed,volume_shape,group_mask_voxels,sampled_voxels,noise_std,dropout_range,mean_missing_fraction,std_missing_fraction,workflow_reference,python_version,h5py_version,platform +2026-03-26T20:46:43.782403+00:00,full,10000,64.0,gzip,1,1,float32,65193,10000,1677,94.87693920900347,44.25927141378634,50.605983917892445,2013605182,1.8753159623593092,6871322.003378185,26.21201325751566,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T20:49:20.479381+00:00,full,10000,64.0,gzip,4,1,float32,65193,10000,1677,156.67791283299448,70.05660621309653,86.59645204013214,1983220356,1.8470178879797459,4160956.628870227,15.872789874535473,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:07:46.163659+00:00,full,10000,64.0,gzip,9,1,float32,65193,10000,1677,1105.6567428749986,68.48789771093288,1037.158696042141,1961069611,1.826388399116695,589631.4603977455,2.249265519705755,20270313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08383643335940977,0.030253493510486312,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:13:38.674718+00:00,full,40000,64.0,gzip,1,1,float32,65193,40000,419,352.45393891702406,152.77298612793675,199.6696622891468,8025649427,7.47446848731488,7398754.027299773,28.2240067569724,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T21:20:18.780261+00:00,full,40000,64.0,gzip,4,1,float32,65193,40000,419,400.07442316698143,148.75089216290507,251.3099567850004,7901120169,7.358491578139365,6518087.258258947,24.86452964118556,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O +2026-03-26T22:00:57.317212+00:00,full,40000,64.0,gzip,9,1,float32,65193,40000,419,2438.502251375001,114.17161474452587,2324.316934789298,7800217914,7.264519030228257,1069394.1326195544,4.079414873579233,20300313,"[61, 73, 61]",65193,65193,0.35,"[0.01, 0.08]",0.08311344354455233,0.0302910410854366,Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation,3.13.12,3.16.0,macOS-26.3.1-arm64-arm-64bit-Mach-O diff --git a/benchmark_results/plots/chunk_geometry_vs_target_chunk.svg b/benchmark_results/plots/chunk_geometry_vs_target_chunk.svg new file mode 100644 index 0000000..8b909a9 --- /dev/null +++ b/benchmark_results/plots/chunk_geometry_vs_target_chunk.svg @@ -0,0 +1,825 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark_results/plots/chunk_tradeoff_time_and_size.svg b/benchmark_results/plots/chunk_tradeoff_time_and_size.svg new file mode 100644 index 0000000..b8319c5 --- /dev/null +++ b/benchmark_results/plots/chunk_tradeoff_time_and_size.svg @@ -0,0 +1,880 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark_results/plots/gzip_level_tradeoff_time_and_size.svg b/benchmark_results/plots/gzip_level_tradeoff_time_and_size.svg new file mode 100644 index 0000000..bb8ad7a --- /dev/null +++ b/benchmark_results/plots/gzip_level_tradeoff_time_and_size.svg @@ -0,0 +1,782 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark_results/plots/h5_benchmark_summary.svg b/benchmark_results/plots/h5_benchmark_summary.svg new file mode 100644 index 0000000..afb8c56 --- /dev/null +++ b/benchmark_results/plots/h5_benchmark_summary.svg @@ -0,0 +1,2440 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark_results/plots/pareto_size_vs_time.svg b/benchmark_results/plots/pareto_size_vs_time.svg new file mode 100644 index 0000000..665c64d --- /dev/null +++ b/benchmark_results/plots/pareto_size_vs_time.svg @@ -0,0 +1,912 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark_results/plots/scaling_size_vs_inputs.svg b/benchmark_results/plots/scaling_size_vs_inputs.svg new file mode 100644 index 0000000..5aea507 --- /dev/null +++ b/benchmark_results/plots/scaling_size_vs_inputs.svg @@ -0,0 +1,621 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark_results/plots/scaling_throughput_mib_vs_inputs.svg b/benchmark_results/plots/scaling_throughput_mib_vs_inputs.svg new file mode 100644 index 0000000..ec443c7 --- /dev/null +++ b/benchmark_results/plots/scaling_throughput_mib_vs_inputs.svg @@ -0,0 +1,673 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark_results/plots/scaling_throughput_values_vs_inputs.svg b/benchmark_results/plots/scaling_throughput_values_vs_inputs.svg new file mode 100644 index 0000000..c55c71c --- /dev/null +++ b/benchmark_results/plots/scaling_throughput_values_vs_inputs.svg @@ -0,0 +1,653 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark_results/plots/scaling_time_vs_inputs.svg b/benchmark_results/plots/scaling_time_vs_inputs.svg new file mode 100644 index 0000000..20818ad --- /dev/null +++ b/benchmark_results/plots/scaling_time_vs_inputs.svg @@ -0,0 +1,598 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark_results/plots/shuffle_effect_summary.svg b/benchmark_results/plots/shuffle_effect_summary.svg new file mode 100644 index 0000000..c784175 --- /dev/null +++ b/benchmark_results/plots/shuffle_effect_summary.svg @@ -0,0 +1,832 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark_results/run_meta_gw0.json b/benchmark_results/run_meta_gw0.json new file mode 100644 index 0000000..993559e --- /dev/null +++ b/benchmark_results/run_meta_gw0.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw0.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T21:04:28.708136+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw1.json b/benchmark_results/run_meta_gw1.json new file mode 100644 index 0000000..8ec3ac7 --- /dev/null +++ b/benchmark_results/run_meta_gw1.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw1.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T22:00:30.886036+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw10.json b/benchmark_results/run_meta_gw10.json new file mode 100644 index 0000000..0edf984 --- /dev/null +++ b/benchmark_results/run_meta_gw10.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw10.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T21:06:31.992554+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw11.json b/benchmark_results/run_meta_gw11.json new file mode 100644 index 0000000..433bb7f --- /dev/null +++ b/benchmark_results/run_meta_gw11.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw11.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T21:07:30.806494+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw12.json b/benchmark_results/run_meta_gw12.json new file mode 100644 index 0000000..ca71484 --- /dev/null +++ b/benchmark_results/run_meta_gw12.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw12.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T21:19:18.547934+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw13.json b/benchmark_results/run_meta_gw13.json new file mode 100644 index 0000000..645858a --- /dev/null +++ b/benchmark_results/run_meta_gw13.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw13.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T21:25:29.500762+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw2.json b/benchmark_results/run_meta_gw2.json new file mode 100644 index 0000000..cae008e --- /dev/null +++ b/benchmark_results/run_meta_gw2.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw2.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T21:27:17.420957+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw3.json b/benchmark_results/run_meta_gw3.json new file mode 100644 index 0000000..bce7eb9 --- /dev/null +++ b/benchmark_results/run_meta_gw3.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw3.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T21:58:15.546992+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw4.json b/benchmark_results/run_meta_gw4.json new file mode 100644 index 0000000..b4724be --- /dev/null +++ b/benchmark_results/run_meta_gw4.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw4.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T21:25:42.735394+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw5.json b/benchmark_results/run_meta_gw5.json new file mode 100644 index 0000000..3565b42 --- /dev/null +++ b/benchmark_results/run_meta_gw5.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw5.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T21:59:14.306858+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw6.json b/benchmark_results/run_meta_gw6.json new file mode 100644 index 0000000..cdc01f5 --- /dev/null +++ b/benchmark_results/run_meta_gw6.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw6.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T21:21:13.858003+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw7.json b/benchmark_results/run_meta_gw7.json new file mode 100644 index 0000000..ab518c5 --- /dev/null +++ b/benchmark_results/run_meta_gw7.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw7.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T22:00:19.598245+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw8.json b/benchmark_results/run_meta_gw8.json new file mode 100644 index 0000000..9e95c2f --- /dev/null +++ b/benchmark_results/run_meta_gw8.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw8.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T21:06:56.973858+00:00" +} \ No newline at end of file diff --git a/benchmark_results/run_meta_gw9.json b/benchmark_results/run_meta_gw9.json new file mode 100644 index 0000000..e10ea69 --- /dev/null +++ b/benchmark_results/run_meta_gw9.json @@ -0,0 +1,13 @@ +{ + "commands": { + "full": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q", + "medium": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q", + "parallel": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q", + "plot": "Rscript test/plot_h5_benchmarks.R", + "quick": "PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q" + }, + "csv_path": "/Users/tientong/projects/ModelArrayIO/benchmark_results/h5_benchmark_results_gw9.csv", + "last_run_kind": "full", + "plots_dir": "/Users/tientong/projects/ModelArrayIO/benchmark_results/plots", + "updated_utc": "2026-03-26T22:00:57.320600+00:00" +} \ No newline at end of file diff --git a/docs/_static/h5_benchmark_summary.svg b/docs/_static/h5_benchmark_summary.svg new file mode 100644 index 0000000..afb8c56 --- /dev/null +++ b/docs/_static/h5_benchmark_summary.svg @@ -0,0 +1,2440 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/benchmarking.rst b/docs/benchmarking.rst new file mode 100644 index 0000000..ebf3958 --- /dev/null +++ b/docs/benchmarking.rst @@ -0,0 +1,116 @@ +############ +Benchmarking +############ + +HDF5 Benchmarking and Plots +=========================== + +The repository includes a pytest-based benchmark suite for HDF5 write settings +with persisted artifacts and diagnostic plots. + +The benchmarks use open ABIDE ALFF + func-mask volumes from S3 as templates +(``test/test_voxels_s3.py`` dataset) and then add seeded, data-adaptive +variation based on per-voxel mean/SD estimated from the downloaded data. +For a fixed benchmark row seed, generated values are deterministic and +independent of chunk/stripe geometry, so chunk-size comparisons are +apples-to-apples. +Within a given cohort size (``num_input_files``), benchmark rows now reuse the +same deterministic seed across chunk/compression/shuffle settings so storage +trade-offs compare identical synthetic values. + +Quick benchmark subset (small + fast) +------------------------------------- + +Runs an expanded mini-grid (chunk size, compression, gzip level, shuffle, and +``num_input_files`` at 100 and 1000) for fast local comparison. +Because template data are loaded from public S3, keep internet access enabled +when running these benchmarks. Since the ABIDE bucket is public, set +``MODELARRAYIO_S3_ANON=1`` to use unsigned S3 +requests (no AWS credentials required). +All benchmark runs use the full real group-mask voxel set (no voxel downsampling) +so file size and throughput trends are directly comparable across run kinds. +Benchmarks now fail fast if S3 templates are unavailable, instead of silently +falling back to a tiny local synthetic mask. + +.. code-block:: console + + # From repository root + MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_quick test/test_h5_benchmarks.py -q + +Medium benchmark sweep (exclude largest cohort) +------------------------------------------------ + +.. code-block:: console + + # Includes cohort sizes: 100, 1000, 10000 + MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_medium test/test_h5_benchmarks.py -q + +Full benchmark sweep +-------------------- + +.. code-block:: console + + # Includes cohort sizes: 100, 1000, 10000, 40000 + MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -m benchmark_full test/test_h5_benchmarks.py -q + +Parallel full sweep (faster on multi-core machines) +---------------------------------------------------- + +.. code-block:: console + + # Requires pytest-xdist; writes per-worker CSV/meta files to avoid write races + MODELARRAYIO_S3_ANON=1 PYTHONPATH=src pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q + +Default benchmark outputs: + +* Serial runs: + * ``benchmark_results/h5_benchmark_results.csv`` + * ``benchmark_results/run_meta.json`` +* Parallel runs (``pytest -n ...``): + * ``benchmark_results/h5_benchmark_results_.csv`` + * ``benchmark_results/run_meta_.json`` + +You can override output location by setting +``MODELARRAYIO_BENCHMARK_RESULTS_DIR``. Benchmark writers and the plotting +script's default CSV auto-discovery both use this directory. + +Timing columns in the CSV: + +* ``elapsed_seconds``: end-to-end benchmark time for the row +* ``data_generation_seconds``: synthetic data generation time +* ``hdf5_write_seconds``: HDF5 dataset creation + writes + +For storage-setting comparisons (chunking/compression), prefer +``hdf5_write_seconds``; use ``elapsed_seconds`` when you want total runtime. + +Generate diagnostic plots from saved CSV +---------------------------------------- + +.. code-block:: console + + Rscript test/plot_h5_benchmarks.R + +The plotting script is implemented in R with ``ggplot2`` and currently requires +``ggplot2``, ``dplyr``, ``tidyr``, and ``patchwork``. +By default, plotting filters to a single comparable slice of results +(``run_kind`` auto-prefers full, and ``sampled_voxels`` auto-picks the max +present in that run kind). Use ``--run-kind`` and ``--sampled-voxels`` to +override, and invalid explicit filters fail with a clear error. +When ``--results-csv`` is omitted, plotting automatically loads all +``h5_benchmark_results*.csv`` files in the benchmark results directory +(``benchmark_results/`` by default, or +``MODELARRAYIO_BENCHMARK_RESULTS_DIR`` when set), including per-worker outputs +from parallel runs. +All scaling and trade-off plots include all available compression variants; the +compression program is encoded by color and variants from the same program +(for example ``gzip-1``, ``gzip-4``, ``gzip-9``) are differentiated by marker +shape. + +The plotting script now generates a curated faceted SVG summary: + +* ``benchmark_results/plots/h5_benchmark_summary.svg`` (analysis output) +* ``docs/_static/h5_benchmark_summary.svg`` (README figure) + +These can be regenerated later without rerunning benchmarks. The CSV includes +file size in bytes and GiB (``output_size_bytes`` and ``output_size_gb``), +plus split timing columns for generation vs write time. diff --git a/docs/index.rst b/docs/index.rst index a1d1114..b47cb12 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,5 +12,6 @@ :maxdepth: 2 auto_examples/index + benchmarking usage api diff --git a/pyproject.toml b/pyproject.toml index 5727360..060c7e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,7 @@ test = [ "pytest-cov>=5", "pytest-xdist>=3", "pytest-env>=1.0", + "matplotlib>=3.8", ] all = ["modelarrayio[doc,test,s3]"] @@ -185,6 +186,10 @@ exclude_lines = [ [tool.pytest.ini_options] markers = [ "s3: tests that require network access to public S3 (deselect with '-m not s3')", + "benchmark: benchmarks that write persistent performance artifacts", + "benchmark_quick: small, fast benchmark subset for local inspection", + "benchmark_medium: medium benchmark subset excluding the largest cohort size", + "benchmark_full: full benchmark sweep including the largest cohort size", ] log_cli = true diff --git a/src/modelarrayio/cli/h5_to_nifti.py b/src/modelarrayio/cli/h5_to_nifti.py index 0614471..2fb1153 100644 --- a/src/modelarrayio/cli/h5_to_nifti.py +++ b/src/modelarrayio/cli/h5_to_nifti.py @@ -37,7 +37,7 @@ def h5_to_nifti(in_file, analysis_name, group_mask_file, output_extension, outpu # Attempt to read column names: prefer attribute; fallback to dataset-based names def _decode_names(arr): try: - if isinstance(arr, (list, tuple)): + if isinstance(arr, list | tuple): seq = arr elif isinstance(arr, np.ndarray): seq = arr.tolist() @@ -45,7 +45,7 @@ def _decode_names(arr): seq = [arr] out = [] for x in seq: - if isinstance(x, (bytes, bytearray, np.bytes_)): + if isinstance(x, bytes | bytearray | np.bytes_): s = x.decode('utf-8', errors='ignore') else: s = str(x) diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..7880747 --- /dev/null +++ b/test/__init__.py @@ -0,0 +1 @@ +"""Test package for ModelArrayIO.""" diff --git a/test/benchmark_data_utils.py b/test/benchmark_data_utils.py new file mode 100644 index 0000000..832b988 --- /dev/null +++ b/test/benchmark_data_utils.py @@ -0,0 +1,383 @@ +"""Data helpers for HDF5 benchmark tests using real S3 voxel templates. + +The benchmark dataset is built from open ABIDE ALFF/mask files on S3 (same +workflow pattern as ``test/test_voxels_s3.py``), then expanded with controlled +random variation to emulate larger cohorts. +""" + +from __future__ import annotations + +import logging +import os +from dataclasses import dataclass +from functools import lru_cache + +import numpy as np + +from modelarrayio.utils.s3_utils import load_nibabel +from modelarrayio.utils.voxels import flattened_image + +logger = logging.getLogger(__name__) + +_U64_FLOAT_DENOM = float(2**64) +_SALT_DROPOUT = np.uint64(0xA24BAED4963EE407) +_SALT_NOISE_1 = np.uint64(0x9FB21C651E98DF25) +_SALT_NOISE_2 = np.uint64(0xC13FA9A902A6328F) +_SUBJECT_MUL = np.uint64(0x9E3779B185EBCA87) +_VOXEL_MUL = np.uint64(0xC2B2AE3D27D4EB4F) + +# Open ABIDE OHSU subjects used in test/test_voxels_s3.py +_OHSU_SUBJECTS = [ + 'OHSU_0050142', + 'OHSU_0050143', + 'OHSU_0050144', + 'OHSU_0050145', +] +_BUCKET = 'fcp-indi' +_PREFIX = 'data/Projects/ABIDE_Initiative/Outputs/cpac/filt_global' + + +@dataclass(frozen=True) +class SyntheticBenchmarkDataset: + """Container for benchmark-ready row vectors and generation metadata.""" + + rows: list[np.ndarray] + metadata: dict[str, object] + + +@dataclass(frozen=True) +class SyntheticBenchmarkPlan: + """Precomputed benchmark synthesis plan for streamed stripe generation.""" + + sampled_templates: np.ndarray + voxel_mean: np.ndarray + voxel_sd: np.ndarray + template_indices: np.ndarray + subject_scales: np.ndarray + subject_offsets: np.ndarray + dropout_probs: np.ndarray + subject_key_base_u64: np.ndarray + dropout_thresholds_u64: np.ndarray + noise_std: float + seed: int + metadata: dict[str, object] + + @property + def num_subjects(self) -> int: + return int(self.template_indices.shape[0]) + + @property + def num_items(self) -> int: + return int(self.voxel_mean.shape[0]) + + +def _s3_alff(subject_id: str) -> str: + return f's3://{_BUCKET}/{_PREFIX}/alff/{subject_id}_alff.nii.gz' + + +def _s3_mask(subject_id: str) -> str: + return f's3://{_BUCKET}/{_PREFIX}/func_mask/{subject_id}_func_mask.nii.gz' + + +@lru_cache(maxsize=2) +def _load_s3_template_rows( + allow_fallback: bool = True, +) -> tuple[np.ndarray, np.ndarray, tuple[int, int, int]]: + """Load and cache real ALFF template rows from S3.""" + os.environ.setdefault('MODELARRAYIO_S3_ANON', '1') + try: + group_mask_img = load_nibabel(_s3_mask(_OHSU_SUBJECTS[0])) + group_mask_matrix = group_mask_img.get_fdata() > 0 + volume_shape = tuple(int(v) for v in group_mask_img.shape[:3]) + + template_rows: list[np.ndarray] = [] + for subject in _OHSU_SUBJECTS: + scalar_img = load_nibabel(_s3_alff(subject)) + subject_mask_img = load_nibabel(_s3_mask(subject)) + row = flattened_image(scalar_img, subject_mask_img, group_mask_matrix).astype( + np.float32 + ) + template_rows.append(row) + + if not template_rows: + raise ValueError('failed to load S3 template rows') + return group_mask_matrix, np.vstack(template_rows), volume_shape + except (ImportError, OSError, RuntimeError, ValueError) as exc: + if not allow_fallback: + raise RuntimeError( + 'Failed to load required S3 benchmark templates. ' + 'Install modelarrayio[s3], enable network access, and set MODELARRAYIO_S3_ANON=1.' + ) from exc + # Keep benchmark tests runnable in environments without boto3/network. + logger.warning('Falling back to local synthetic templates for benchmarks: %s', exc) + return _load_local_fallback_template_rows() + + +def _load_local_fallback_template_rows() -> tuple[np.ndarray, np.ndarray, tuple[int, int, int]]: + """Build deterministic local template rows when S3 templates are unavailable.""" + rng = np.random.default_rng(20260313) + volume_shape = (16, 16, 8) + group_mask_matrix = rng.random(volume_shape) > 0.25 + num_voxels = int(group_mask_matrix.sum()) + + base_signal = rng.normal(loc=0.0, scale=1.0, size=num_voxels).astype(np.float32) + template_rows: list[np.ndarray] = [] + for _ in _OHSU_SUBJECTS: + scale = float(rng.normal(loc=1.0, scale=0.15)) + offset = float(rng.normal(loc=0.0, scale=0.2)) + row = (base_signal * scale + offset).astype(np.float32) + dropout = rng.random(num_voxels) < 0.05 + row[dropout] = np.nan + template_rows.append(row) + return group_mask_matrix, np.vstack(template_rows), volume_shape + + +def _splitmix64(x: np.ndarray) -> np.ndarray: + """Vectorized SplitMix64 mixing for deterministic pseudo-random u64 values.""" + z = (x + np.uint64(0x9E3779B97F4A7C15)).astype(np.uint64, copy=False) + z = (z ^ (z >> np.uint64(30))) * np.uint64(0xBF58476D1CE4E5B9) + z = (z ^ (z >> np.uint64(27))) * np.uint64(0x94D049BB133111EB) + return z ^ (z >> np.uint64(31)) + + +def _u64_to_unit_float(values: np.ndarray) -> np.ndarray: + # Map uint64 to (0, 1) deterministically. + return (values.astype(np.float64) + 0.5) / _U64_FLOAT_DENOM + + +@lru_cache(maxsize=32) +def make_realistic_voxel_benchmark_plan( + num_input_files: int, + *, + seed: int = 8675309, + max_voxels: int = 0, + noise_std: float = 0.35, + dropout_range: tuple[float, float] = (0.01, 0.08), + require_s3_templates: bool = False, +) -> SyntheticBenchmarkPlan: + """Build a deterministic benchmark synthesis plan for streamed writes. + + This avoids materializing the full (subjects x items) matrix in memory. + """ + if num_input_files <= 0: + raise ValueError('num_input_files must be positive') + if max_voxels < 0: + max_voxels = 0 + + min_dropout, max_dropout = dropout_range + if not (0.0 <= min_dropout <= max_dropout < 1.0): + raise ValueError('dropout_range must satisfy 0 <= min <= max < 1') + + rng = np.random.default_rng(seed) + group_mask_matrix, template_rows, volume_shape = _load_s3_template_rows( + allow_fallback=not require_s3_templates + ) + num_template_rows, total_items = template_rows.shape + if total_items == 0: + raise ValueError('S3 template rows are empty') + + if max_voxels == 0 or max_voxels >= total_items: + selected_indices = np.arange(total_items, dtype=np.int64) + else: + selected_indices = np.sort(rng.choice(total_items, size=max_voxels, replace=False)) + sampled_templates = template_rows[:, selected_indices].astype(np.float32, copy=False) + + voxel_mean = np.nanmean(sampled_templates, axis=0).astype(np.float32) + voxel_sd = np.nanstd(sampled_templates, axis=0).astype(np.float32) + finite_sd = np.isfinite(voxel_sd) & (voxel_sd > 1e-6) + sd_floor = float(np.median(voxel_sd[finite_sd])) if np.any(finite_sd) else 1.0 + voxel_sd = np.where(finite_sd, voxel_sd, sd_floor).astype(np.float32) + + finite_mean = np.isfinite(voxel_mean) + mean_fill = float(np.mean(voxel_mean[finite_mean])) if np.any(finite_mean) else 0.0 + voxel_mean = np.where(finite_mean, voxel_mean, mean_fill).astype(np.float32) + + template_indices = rng.integers(0, num_template_rows, size=num_input_files, dtype=np.int64) + subject_scales = rng.normal(loc=1.0, scale=0.12, size=num_input_files).astype(np.float32) + subject_offsets = rng.normal(loc=0.0, scale=0.25, size=num_input_files).astype(np.float32) + dropout_probs = rng.uniform(min_dropout, max_dropout, size=num_input_files).astype(np.float32) + seed_u64 = np.uint64(seed) + subject_key_base_u64 = ( + np.arange(num_input_files, dtype=np.uint64)[:, np.newaxis] * _SUBJECT_MUL + ) ^ seed_u64 + dropout_thresholds_u64 = ( + dropout_probs.astype(np.float64) * _U64_FLOAT_DENOM + ).astype(np.uint64)[:, np.newaxis] + + uses_s3_templates = template_rows.shape[1] > 0 and volume_shape != (16, 16, 8) + metadata: dict[str, object] = { + 'seed': seed, + 'num_input_files': num_input_files, + 'volume_shape': list(volume_shape), + 'group_mask_voxels': int(group_mask_matrix.sum()), + 'sampled_voxels': int(selected_indices.shape[0]), + 'noise_std': float(noise_std), + 'adaptive_sd_median': float(np.median(voxel_sd)), + 'adaptive_sd_mean': float(np.mean(voxel_sd)), + 'dropout_range': [float(min_dropout), float(max_dropout)], + # Filled after streamed generation/writing. + 'mean_missing_fraction': float('nan'), + 'std_missing_fraction': float('nan'), + 'template_subjects': _OHSU_SUBJECTS if uses_s3_templates else ['local_synthetic_template'], + 'workflow_reference': ( + 'Open S3 ABIDE ALFF + func-mask templates with per-voxel mean/SD adaptive variation' + if uses_s3_templates + else 'Local synthetic fallback templates with per-voxel mean/SD adaptive variation' + ), + } + return SyntheticBenchmarkPlan( + sampled_templates=sampled_templates, + voxel_mean=voxel_mean, + voxel_sd=voxel_sd, + template_indices=template_indices, + subject_scales=subject_scales, + subject_offsets=subject_offsets, + dropout_probs=dropout_probs, + subject_key_base_u64=subject_key_base_u64, + dropout_thresholds_u64=dropout_thresholds_u64, + noise_std=float(noise_std), + seed=int(seed), + metadata=metadata, + ) + + +def fill_realistic_voxel_stripe( + plan: SyntheticBenchmarkPlan, + *, + start: int, + end: int, + out: np.ndarray, +) -> np.ndarray: + """Fill ``out`` with synthetic values for a column stripe and return NaN counts. + + Parameters + ---------- + plan + Precomputed synthesis plan. + start, end + Stripe bounds in [0, num_items]. + out + Destination array with shape (num_subjects, >= stripe_width), dtype float32. + """ + if start < 0 or end <= start or end > plan.num_items: + raise ValueError(f'invalid stripe bounds: start={start}, end={end}, num_items={plan.num_items}') + stripe_width = end - start + if out.shape[0] != plan.num_subjects or out.shape[1] < stripe_width: + raise ValueError( + f'out shape {out.shape} incompatible with plan (subjects={plan.num_subjects}, ' + f'stripe_width={stripe_width})' + ) + + view = out[:, :stripe_width] + mean_slice = plan.voxel_mean[start:end] + sd_slice = plan.voxel_sd[start:end] + template_slice = plan.sampled_templates[plan.template_indices, start:end] + + view[:, :] = ( + mean_slice[np.newaxis, :] + + plan.subject_scales[:, np.newaxis] * (template_slice - mean_slice[np.newaxis, :]) + + plan.subject_offsets[:, np.newaxis] * sd_slice[np.newaxis, :] + ) + + # Stateless RNG keyed by (seed, subject, voxel) keeps values invariant + # across stripe/chunk geometry. + voxel_ids = np.arange(start, end, dtype=np.uint64)[np.newaxis, :] + key_matrix = plan.subject_key_base_u64 ^ (voxel_ids * _VOXEL_MUL) + + dropout_hash = _splitmix64(key_matrix ^ _SALT_DROPOUT) + drop_mask = dropout_hash < plan.dropout_thresholds_u64 + view[drop_mask] = np.nan + + valid_mask = np.isfinite(view) + if np.any(valid_mask): + u1 = _u64_to_unit_float(_splitmix64(key_matrix ^ _SALT_NOISE_1)) + u2 = _u64_to_unit_float(_splitmix64(key_matrix ^ _SALT_NOISE_2)) + u1 = np.clip(u1, 1e-12, 1.0) + noise = np.sqrt(-2.0 * np.log(u1)) * np.cos(2.0 * np.pi * u2) + scaled_noise = noise.astype(np.float32, copy=False) * (plan.noise_std * sd_slice)[ + np.newaxis, : + ] + view[valid_mask] = view[valid_mask] + scaled_noise[valid_mask] + + return np.count_nonzero(np.isnan(view), axis=1) + + +def finalize_plan_metadata( + plan: SyntheticBenchmarkPlan, + *, + mean_missing_fraction: float, + std_missing_fraction: float, +) -> dict[str, object]: + metadata = dict(plan.metadata) + metadata['mean_missing_fraction'] = float(mean_missing_fraction) + metadata['std_missing_fraction'] = float(std_missing_fraction) + return metadata + + +def make_realistic_voxel_benchmark_dataset( + num_input_files: int, + *, + seed: int = 8675309, + max_voxels: int = 0, + noise_std: float = 0.35, + dropout_range: tuple[float, float] = (0.01, 0.08), + require_s3_templates: bool = False, +) -> SyntheticBenchmarkDataset: + """Generate benchmark rows from real S3 ALFF templates plus random variation. + + Parameters + ---------- + num_input_files + Number of synthetic subjects / input files to emulate. + seed + Seed for deterministic generation. + max_voxels + Max number of group-mask voxels sampled into the benchmark matrix. + ``0`` uses the full group mask. + noise_std + Additive noise scale in units of per-voxel SD estimated from S3 templates. + dropout_range + Inclusive range for per-subject mask dropout probability. + """ + if num_input_files <= 0: + raise ValueError('num_input_files must be positive') + if max_voxels < 0: + max_voxels = 0 + + plan = make_realistic_voxel_benchmark_plan( + num_input_files=num_input_files, + seed=seed, + max_voxels=max_voxels, + noise_std=noise_std, + dropout_range=dropout_range, + require_s3_templates=require_s3_templates, + ) + rows: list[np.ndarray] = [] + for subject_idx in range(plan.num_subjects): + row = ( + plan.voxel_mean + + plan.subject_scales[subject_idx] * (plan.sampled_templates[plan.template_indices[subject_idx]] - plan.voxel_mean) + + plan.subject_offsets[subject_idx] * plan.voxel_sd + ).astype(np.float32, copy=False) + + row_seed = int( + (np.uint64(plan.seed) << np.uint64(1)) ^ np.uint64(subject_idx + 0xA5A5A5A5) + ) + row_rng = np.random.default_rng(row_seed) + row[row_rng.random(plan.num_items, dtype=np.float32) < plan.dropout_probs[subject_idx]] = np.nan + + valid = np.isfinite(row) + if np.any(valid): + noise = row_rng.normal(loc=0.0, scale=1.0, size=plan.num_items).astype( + np.float32, copy=False + ) + row[valid] = row[valid] + noise[valid] * (plan.noise_std * plan.voxel_sd[valid]) + rows.append(row) + + missing_fractions = np.array([float(np.isnan(row).mean()) for row in rows], dtype=np.float32) + metadata = finalize_plan_metadata( + plan, + mean_missing_fraction=float(np.mean(missing_fractions)), + std_missing_fraction=float(np.std(missing_fractions)), + ) + return SyntheticBenchmarkDataset(rows=rows, metadata=metadata) diff --git a/test/plot_h5_benchmarks.R b/test/plot_h5_benchmarks.R new file mode 100644 index 0000000..4551acd --- /dev/null +++ b/test/plot_h5_benchmarks.R @@ -0,0 +1,891 @@ +#!/usr/bin/env Rscript + +required_packages <- c('ggplot2', 'dplyr', 'tidyr', 'patchwork') +missing_packages <- required_packages[ + !vapply(required_packages, requireNamespace, logical(1), quietly = TRUE) +] +if (length(missing_packages) > 0) { + stop( + sprintf( + 'Missing required R packages: %s', + paste(missing_packages, collapse = ', ') + ), + call. = FALSE + ) +} + +suppressPackageStartupMessages({ + library(ggplot2) + library(dplyr) + library(tidyr) + library(patchwork) +}) + +theme_set( + theme_bw(base_size = 16) + + theme( + plot.title = element_text(size = 18, face = 'bold'), + axis.title = element_text(size = 16), + axis.text = element_text(size = 14), + legend.title = element_text(size = 14), + legend.text = element_text(size = 13), + strip.text = element_text(size = 14) + ) +) + +script_path <- function() { + command_args <- commandArgs(trailingOnly = FALSE) + file_arg <- grep('^--file=', command_args, value = TRUE) + if (length(file_arg) > 0) { + return(normalizePath(sub('^--file=', '', file_arg[[1]]), mustWork = FALSE)) + } + if (!is.null(sys.frames()[[1]]$ofile)) { + return(normalizePath(sys.frames()[[1]]$ofile, mustWork = FALSE)) + } + normalizePath('test/plot_h5_benchmarks.R', mustWork = FALSE) +} + +project_root <- function() { + dirname(dirname(script_path())) +} + +benchmark_results_dir <- function() { + configured <- Sys.getenv('MODELARRAYIO_BENCHMARK_RESULTS_DIR', unset = '') + if (nzchar(configured)) { + return(normalizePath(path.expand(configured), mustWork = FALSE)) + } + file.path(project_root(), 'benchmark_results') +} + +default_results_csv_candidates <- function() { + results_dir <- benchmark_results_dir() + candidates <- sort( + list.files( + path = results_dir, + pattern = '^h5_benchmark_results.*\\.csv$', + full.names = TRUE + ) + ) + if (length(candidates) > 0) { + return(candidates) + } + file.path(results_dir, 'h5_benchmark_results.csv') +} + +default_plot_dir <- function() { + file.path(benchmark_results_dir(), 'plots') +} + +default_readme_svg <- function() { + file.path(project_root(), 'docs', '_static', 'h5_benchmark_summary.svg') +} + +print_help <- function() { + cat( + paste( + 'Generate diagnostic plots for HDF5 benchmark runs (R + ggplot2).', + '', + 'Usage:', + ' Rscript test/plot_h5_benchmarks.R [options]', + '', + 'Options:', + ' --results-csv [PATH ...] One or more benchmark CSV files. If omitted,', + ' auto-loads benchmark_results/h5_benchmark_results*.csv.', + ' --output-dir PATH Directory where plot images are written.', + ' --readme-svg PATH Path where README summary SVG is written.', + ' --run-kind VALUE One of: auto, all, quick, medium, full.', + ' --sampled-voxels INT Filter by sampled_voxels (-1 selects max available).', + ' -h, --help Show this help and exit.', + sep = '\n' + ) + ) +} + +parse_cli_args <- function(args) { + opts <- list( + results_csv = NULL, + output_dir = default_plot_dir(), + readme_svg = default_readme_svg(), + run_kind = 'auto', + sampled_voxels = -1L + ) + + i <- 1L + while (i <= length(args)) { + arg <- args[[i]] + if (arg %in% c('-h', '--help')) { + print_help() + quit(save = 'no', status = 0L) + } + + if (arg == '--results-csv') { + i <- i + 1L + values <- character() + while (i <= length(args) && !startsWith(args[[i]], '--')) { + values <- c(values, args[[i]]) + i <- i + 1L + } + opts$results_csv <- values + next + } + + if (arg == '--output-dir') { + if (i >= length(args)) { + stop('--output-dir requires a value', call. = FALSE) + } + i <- i + 1L + opts$output_dir <- args[[i]] + i <- i + 1L + next + } + + if (arg == '--readme-svg') { + if (i >= length(args)) { + stop('--readme-svg requires a value', call. = FALSE) + } + i <- i + 1L + opts$readme_svg <- args[[i]] + i <- i + 1L + next + } + + if (arg == '--run-kind') { + if (i >= length(args)) { + stop('--run-kind requires a value', call. = FALSE) + } + i <- i + 1L + opts$run_kind <- args[[i]] + i <- i + 1L + next + } + + if (arg == '--sampled-voxels') { + if (i >= length(args)) { + stop('--sampled-voxels requires a value', call. = FALSE) + } + i <- i + 1L + opts$sampled_voxels <- suppressWarnings(as.integer(args[[i]])) + if (is.na(opts$sampled_voxels)) { + stop('--sampled-voxels must be an integer', call. = FALSE) + } + i <- i + 1L + next + } + + stop(sprintf('Unknown argument: %s', arg), call. = FALSE) + } + + valid_run_kinds <- c('auto', 'all', 'quick', 'medium', 'full') + if (!(opts$run_kind %in% valid_run_kinds)) { + stop( + sprintf( + '--run-kind must be one of: %s', + paste(valid_run_kinds, collapse = ', ') + ), + call. = FALSE + ) + } + + opts +} + +load_results <- function(paths) { + if (length(paths) == 0) { + stop('No benchmark results CSV paths were provided.', call. = FALSE) + } + + data_frames <- list() + missing_paths <- character() + for (path in paths) { + if (!file.exists(path)) { + missing_paths <- c(missing_paths, path) + next + } + data_frame <- read.csv(path, stringsAsFactors = FALSE) + if (nrow(data_frame) > 0) { + data_frame$results_csv_path <- path + data_frames[[length(data_frames) + 1L]] <- data_frame + } + } + + if (length(data_frames) == 0) { + if (length(missing_paths) > 0) { + stop( + sprintf( + 'benchmark results CSV not found: %s', + paste(missing_paths, collapse = ', ') + ), + call. = FALSE + ) + } + stop('benchmark results CSV is empty', call. = FALSE) + } + + data_frame <- bind_rows(data_frames) + numeric_columns <- c( + 'num_input_files', + 'target_chunk_mb', + 'compression_level', + 'shuffle', + 'chunk_subjects', + 'chunk_items', + 'elapsed_seconds', + 'data_generation_seconds', + 'hdf5_write_seconds', + 'output_size_bytes', + 'output_size_gb', + 'throughput_values_per_second', + 'throughput_mb_per_second', + 'group_mask_voxels', + 'sampled_voxels', + 'mean_missing_fraction', + 'std_missing_fraction' + ) + for (column in numeric_columns) { + if (column %in% names(data_frame)) { + data_frame[[column]] <- suppressWarnings(as.numeric(data_frame[[column]])) + } + } + + if (!('output_size_gb' %in% names(data_frame))) { + data_frame$output_size_gb <- data_frame$output_size_bytes / (1024.0^3) + } + if ('shuffle' %in% names(data_frame)) { + data_frame$shuffle_label <- ifelse( + data_frame$shuffle == 1, + 'on', + ifelse(data_frame$shuffle == 0, 'off', 'unknown') + ) + } + + filtered <- data_frame |> + filter( + !is.na(num_input_files), + !is.na(target_chunk_mb), + !is.na(elapsed_seconds), + !is.na(output_size_bytes) + ) + if (nrow(filtered) == 0) { + stop( + 'No benchmark rows contained required columns after CSV loading.', + call. = FALSE + ) + } + filtered +} + +auto_run_kind <- function(data_frame) { + if (!('run_kind' %in% names(data_frame))) { + return(NULL) + } + run_kinds <- sort(unique(as.character(data_frame$run_kind[!is.na(data_frame$run_kind)]))) + for (preferred in c('full', 'medium', 'quick')) { + if (preferred %in% run_kinds) { + return(preferred) + } + } + if (length(run_kinds) > 0) { + return(run_kinds[[length(run_kinds)]]) + } + NULL +} + +filter_comparable_results <- function(data_frame, run_kind, sampled_voxels) { + filtered <- data_frame + details <- character() + + if (run_kind != 'all' && 'run_kind' %in% names(filtered)) { + selected_run_kind <- if (run_kind == 'auto') { + auto_run_kind(filtered) + } else { + run_kind + } + if (!is.null(selected_run_kind)) { + subset <- filtered[filtered$run_kind == selected_run_kind, , drop = FALSE] + if (nrow(subset) == 0) { + if (run_kind != 'auto') { + stop( + sprintf('No rows found for requested run_kind=%s', run_kind), + call. = FALSE + ) + } + } else { + filtered <- subset + details <- c(details, sprintf('run_kind=%s', selected_run_kind)) + } + } + } + + if ('sampled_voxels' %in% names(filtered) && any(!is.na(filtered$sampled_voxels))) { + target_sampled_voxels <- if (sampled_voxels < 0) { + as.integer(max(filtered$sampled_voxels, na.rm = TRUE)) + } else { + as.integer(sampled_voxels) + } + subset <- filtered[filtered$sampled_voxels == target_sampled_voxels, , drop = FALSE] + if (nrow(subset) == 0) { + if (sampled_voxels >= 0) { + stop( + sprintf( + 'No rows found for requested sampled_voxels=%s', + target_sampled_voxels + ), + call. = FALSE + ) + } + } else { + filtered <- subset + details <- c(details, sprintf('sampled_voxels=%s', target_sampled_voxels)) + } + } + + if (nrow(filtered) == 0) { + stop( + 'No benchmark rows left after filtering for comparable runs.', + call. = FALSE + ) + } + + label <- if (length(details) > 0) { + paste(details, collapse = ', ') + } else { + 'all available rows' + } + list(data = filtered, label = label) +} + +annotate_compression_fields <- function(data_frame) { + if (!('compression' %in% names(data_frame))) { + stop("CSV must contain a 'compression' column.", call. = FALSE) + } + if (!('compression_level' %in% names(data_frame))) { + data_frame$compression_level <- NA_real_ + } + + data_frame <- data_frame |> + mutate( + compression_program = as.character(compression), + compression_level_num = suppressWarnings(as.integer(compression_level)), + compression_variant = ifelse( + compression_program == 'gzip' & !is.na(compression_level_num), + paste0('gzip-', compression_level_num), + compression_program + ) + ) + + preferred_program_order <- c('none', 'lzf', 'gzip') + seen_programs <- sort(unique(data_frame$compression_program)) + program_levels <- c( + preferred_program_order[preferred_program_order %in% seen_programs], + sort(setdiff(seen_programs, preferred_program_order)) + ) + + variant_table <- data_frame |> + distinct(compression_program, compression_variant, compression_level_num) |> + mutate( + program_rank = match(compression_program, program_levels), + level_rank = ifelse(compression_program == 'gzip', compression_level_num, -1L) + ) |> + arrange(program_rank, level_rank, compression_variant) + variant_levels <- unique(variant_table$compression_variant) + + data_frame$compression_program <- factor( + data_frame$compression_program, + levels = program_levels + ) + data_frame$compression_variant <- factor( + data_frame$compression_variant, + levels = variant_levels + ) + + list( + data = data_frame, + program_levels = program_levels, + variant_levels = variant_levels + ) +} + +program_palette <- function(program_levels) { + fixed <- c(none = '#2ca02c', lzf = '#ff7f0e', gzip = '#1f77b4') + palette <- fixed[names(fixed) %in% program_levels] + unknown_levels <- setdiff(program_levels, names(palette)) + if (length(unknown_levels) > 0) { + palette <- c(palette, stats::setNames(rep('#7f7f7f', length(unknown_levels)), unknown_levels)) + } + palette[program_levels] +} + +variant_shape_map <- function(variant_levels) { + base_shapes <- c(16, 17, 15, 18, 3, 7, 8, 9, 10, 12, 13, 14, 0, 1, 2, 4, 5, 6, 11) + stats::setNames(rep(base_shapes, length.out = length(variant_levels)), variant_levels) +} + +apply_compression_scales <- function(plot, program_colors, shape_values) { + plot + + scale_color_manual( + values = program_colors, + name = 'Compression program', + drop = FALSE + ) + + scale_shape_manual( + values = shape_values, + name = 'Compression variant', + drop = FALSE + ) +} + +build_line_metric_plot <- function( + data_frame, + metric, + y_label, + title, + program_colors, + shape_values +) { + grouped <- data_frame |> + group_by(num_input_files, compression_program, compression_variant) |> + summarise(metric_value = median(.data[[metric]], na.rm = TRUE), .groups = 'drop') |> + arrange(num_input_files) + + plot <- ggplot( + grouped, + aes( + x = num_input_files, + y = metric_value, + color = compression_program, + shape = compression_variant, + group = compression_variant + ) + ) + + geom_line(linewidth = 0.7, show.legend = FALSE) + + geom_point(size = 2.2) + + scale_x_log10() + + labs( + title = title, + x = 'Number of input files', + y = y_label + ) + + theme(legend.position = 'bottom') + + apply_compression_scales(plot, program_colors, shape_values) +} + +build_pareto_plot <- function(data_frame, program_colors, shape_values) { + plot <- ggplot( + data_frame, + aes( + x = elapsed_seconds, + y = output_size_gb, + color = compression_program, + shape = compression_variant + ) + ) + + geom_point(alpha = 0.65, size = 2.2) + + labs( + title = 'Pareto view: write time vs output size by compression variant (all runs)', + x = 'Write time (seconds)', + y = 'Output size (GiB)' + ) + + theme(legend.position = 'bottom') + + apply_compression_scales(plot, program_colors, shape_values) +} + +build_chunk_geometry_plot <- function(data_frame) { + grouped <- data_frame |> + group_by(num_input_files, target_chunk_mb) |> + summarise( + chunk_items = median(chunk_items, na.rm = TRUE), + chunk_subjects = median(chunk_subjects, na.rm = TRUE), + .groups = 'drop' + ) |> + pivot_longer( + cols = c(chunk_items, chunk_subjects), + names_to = 'metric', + values_to = 'metric_value' + ) + + label_map <- c( + chunk_items = 'Chunk items vs target chunk size', + chunk_subjects = 'Chunk subjects vs target chunk size' + ) + grouped$metric_label <- factor(grouped$metric, levels = names(label_map), labels = label_map) + + ggplot( + grouped, + aes( + x = target_chunk_mb, + y = metric_value, + color = factor(num_input_files), + group = factor(num_input_files) + ) + ) + + geom_line(linewidth = 0.7) + + geom_point(size = 2) + + facet_wrap(~metric_label, nrow = 1, scales = 'free_y') + + labs( + title = 'Chunk size across cohort sizes', + x = 'Target chunk size (MiB)', + y = 'Median chunk size (count)', + color = 'num_input_files' + ) + + theme(legend.position = 'bottom') +} + +build_chunk_tradeoff_plot <- function(data_frame, program_colors, shape_values) { + grouped <- data_frame |> + group_by(compression_program, compression_variant, target_chunk_mb) |> + summarise( + elapsed_seconds = median(elapsed_seconds, na.rm = TRUE), + output_size_gb = median(output_size_gb, na.rm = TRUE), + .groups = 'drop' + ) |> + pivot_longer( + cols = c(elapsed_seconds, output_size_gb), + names_to = 'metric', + values_to = 'metric_value' + ) + + label_map <- c( + elapsed_seconds = 'Write time vs chunk target (seconds)', + output_size_gb = 'Output size vs chunk target (GiB)' + ) + grouped$metric_label <- factor(grouped$metric, levels = names(label_map), labels = label_map) + + plot <- ggplot( + grouped, + aes( + x = target_chunk_mb, + y = metric_value, + color = compression_program, + shape = compression_variant, + group = compression_variant + ) + ) + + geom_line(linewidth = 0.7) + + geom_point(size = 2.2) + + facet_wrap(~metric_label, nrow = 1, scales = 'free_y') + + labs( + title = 'Chunk target trade-off by compression variant', + x = 'Target chunk size (MiB)', + y = NULL + ) + + theme( + legend.position = 'bottom', + axis.title.y = element_blank() + ) + + apply_compression_scales(plot, program_colors, shape_values) +} + +build_gzip_level_plot <- function(data_frame) { + gzip_frame <- data_frame |> + filter(compression_program == 'gzip') + if (nrow(gzip_frame) == 0) { + return(NULL) + } + + grouped <- gzip_frame |> + group_by(compression_level_num, target_chunk_mb) |> + summarise( + elapsed_seconds = median(elapsed_seconds, na.rm = TRUE), + output_size_gb = median(output_size_gb, na.rm = TRUE), + .groups = 'drop' + ) |> + mutate(compression_level_label = paste0('gzip-', compression_level_num)) |> + pivot_longer( + cols = c(elapsed_seconds, output_size_gb), + names_to = 'metric', + values_to = 'metric_value' + ) + + label_map <- c( + elapsed_seconds = 'Gzip level effect on write time (seconds)', + output_size_gb = 'Gzip level effect on output size (GiB)' + ) + grouped$metric_label <- factor(grouped$metric, levels = names(label_map), labels = label_map) + + ggplot( + grouped, + aes( + x = target_chunk_mb, + y = metric_value, + color = compression_level_label, + shape = compression_level_label, + group = compression_level_label + ) + ) + + geom_line(linewidth = 0.7) + + geom_point(size = 2.2) + + facet_wrap(~metric_label, nrow = 1, scales = 'free_y') + + labs( + title = 'Gzip level trade-off by chunk size', + x = 'Target chunk size (MiB)', + y = NULL, + color = 'compression_level', + shape = 'compression_level' + ) + + theme( + legend.position = 'bottom', + axis.title.y = element_blank() + ) +} + +build_shuffle_effect_plot <- function(data_frame, program_colors, shape_values) { + if (!('shuffle_label' %in% names(data_frame))) { + return(NULL) + } + + grouped <- data_frame |> + filter(shuffle_label %in% c('on', 'off')) |> + group_by(compression_program, compression_variant, shuffle_label) |> + summarise( + elapsed_seconds = median(elapsed_seconds, na.rm = TRUE), + output_size_gb = median(output_size_gb, na.rm = TRUE), + throughput_mb_per_second = median(throughput_mb_per_second, na.rm = TRUE), + .groups = 'drop' + ) + if (nrow(grouped) == 0) { + return(NULL) + } + + grouped$shuffle_label <- factor(grouped$shuffle_label, levels = c('off', 'on')) + metric_labels <- c( + elapsed_seconds = 'Write time (seconds)', + output_size_gb = 'Output size (GiB)', + throughput_mb_per_second = 'Throughput (MiB/sec)' + ) + long_grouped <- grouped |> + pivot_longer( + cols = c(elapsed_seconds, output_size_gb, throughput_mb_per_second), + names_to = 'metric', + values_to = 'metric_value' + ) + long_grouped$metric_label <- factor( + long_grouped$metric, + levels = names(metric_labels), + labels = metric_labels + ) + + plot <- ggplot( + long_grouped, + aes( + x = shuffle_label, + y = metric_value, + color = compression_program, + shape = compression_variant, + group = compression_variant + ) + ) + + geom_line(linewidth = 0.7) + + geom_point(size = 2.2) + + facet_wrap(~metric_label, nrow = 1, scales = 'free_y') + + labs( + title = 'Shuffle effect by compression variant (median across all benchmark rows)', + x = 'Shuffle', + y = NULL + ) + + theme( + legend.position = 'bottom', + axis.title.y = element_blank(), + axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.line.y = element_blank() + ) + + apply_compression_scales(plot, program_colors, shape_values) +} + +save_plot_svg <- function(plot, path, width, height) { + dir.create(dirname(path), recursive = TRUE, showWarnings = FALSE) + grDevices::svg(filename = path, width = width, height = height) + print(plot) + grDevices::dev.off() +} + +main <- function() { + args <- parse_cli_args(commandArgs(trailingOnly = TRUE)) + selected_results_csv <- args$results_csv + if (is.null(selected_results_csv) || length(selected_results_csv) == 0) { + selected_results_csv <- default_results_csv_candidates() + } + + data_frame <- load_results(selected_results_csv) + filtered <- filter_comparable_results( + data_frame = data_frame, + run_kind = args$run_kind, + sampled_voxels = args$sampled_voxels + ) + data_frame <- filtered$data + selection_label <- filtered$label + + compression_info <- annotate_compression_fields(data_frame) + data_frame <- compression_info$data + program_colors <- program_palette(compression_info$program_levels) + shape_values <- variant_shape_map(compression_info$variant_levels) + + p_scaling_time <- build_line_metric_plot( + data_frame = data_frame, + metric = 'elapsed_seconds', + y_label = 'Write time (seconds)', + title = 'Median write time vs number of input files', + program_colors = program_colors, + shape_values = shape_values + ) + p_scaling_size <- build_line_metric_plot( + data_frame = data_frame, + metric = 'output_size_gb', + y_label = 'Output size (GiB)', + title = 'Median output size vs number of input files', + program_colors = program_colors, + shape_values = shape_values + ) + p_scaling_tput_values <- build_line_metric_plot( + data_frame = data_frame, + metric = 'throughput_values_per_second', + y_label = 'Throughput (values/sec)', + title = 'Median throughput (values/sec) vs number of input files', + program_colors = program_colors, + shape_values = shape_values + ) + p_scaling_tput_mib <- build_line_metric_plot( + data_frame = data_frame, + metric = 'throughput_mb_per_second', + y_label = 'Throughput (MiB/sec)', + title = 'Median throughput (MiB/sec) vs number of input files', + program_colors = program_colors, + shape_values = shape_values + ) + p_pareto <- build_pareto_plot( + data_frame = data_frame, + program_colors = program_colors, + shape_values = shape_values + ) + p_chunk_geometry <- build_chunk_geometry_plot(data_frame) + p_chunk_tradeoff <- build_chunk_tradeoff_plot( + data_frame = data_frame, + program_colors = program_colors, + shape_values = shape_values + ) + p_gzip_level <- build_gzip_level_plot(data_frame) + p_shuffle <- build_shuffle_effect_plot( + data_frame = data_frame, + program_colors = program_colors, + shape_values = shape_values + ) + + p_scaling_size_summary <- p_scaling_size + + guides(color = 'none', shape = 'none') + p_scaling_tput_summary <- p_scaling_tput_mib + + labs(title = 'Median throughput (MiB/sec)\nvs number of input files') + + guides(color = 'none', shape = 'none') + p_pareto_summary <- p_pareto + + labs(title = 'Pareto view:\nwrite time vs output size (all runs)') + + guides(color = 'none', shape = 'none') + p_shuffle_summary <- p_shuffle + + labs(title = 'Shuffle effect by compression variant') + + guides(color = 'none', shape = 'none') + + summary_plot <- ( + (p_scaling_time + p_scaling_size_summary) / + (p_scaling_tput_summary + p_pareto_summary) / + p_shuffle_summary + ) + + plot_layout(guides = 'collect') + + plot_annotation( + title = sprintf('HDF5 benchmark summary (%s)', selection_label) + ) & + theme( + legend.position = 'bottom', + legend.justification = 'center', + legend.direction = 'horizontal', + legend.box = 'horizontal', + legend.box.just = 'center', + legend.key.size = grid::unit(0.8, 'lines'), + legend.spacing.y = grid::unit(0.15, 'cm'), + plot.margin = margin(t = 8, r = 8, b = 16, l = 8) + ) + + output_dir <- normalizePath(args$output_dir, mustWork = FALSE) + readme_svg <- normalizePath(args$readme_svg, mustWork = FALSE) + + save_plot_svg( + plot = summary_plot, + path = file.path(output_dir, 'h5_benchmark_summary.svg'), + width = 13, + height = 13 + ) + save_plot_svg( + plot = summary_plot, + path = readme_svg, + width = 13, + height = 13 + ) + save_plot_svg( + plot = p_scaling_time, + path = file.path(output_dir, 'scaling_time_vs_inputs.svg'), + width = 8, + height = 5 + ) + save_plot_svg( + plot = p_scaling_size, + path = file.path(output_dir, 'scaling_size_vs_inputs.svg'), + width = 8, + height = 5 + ) + save_plot_svg( + plot = p_scaling_tput_values, + path = file.path(output_dir, 'scaling_throughput_values_vs_inputs.svg'), + width = 8, + height = 5 + ) + save_plot_svg( + plot = p_scaling_tput_mib, + path = file.path(output_dir, 'scaling_throughput_mib_vs_inputs.svg'), + width = 8, + height = 5 + ) + save_plot_svg( + plot = p_pareto, + path = file.path(output_dir, 'pareto_size_vs_time.svg'), + width = 8, + height = 6 + ) + save_plot_svg( + plot = p_chunk_geometry, + path = file.path(output_dir, 'chunk_geometry_vs_target_chunk.svg'), + width = 12, + height = 4.5 + ) + save_plot_svg( + plot = p_chunk_tradeoff, + path = file.path(output_dir, 'chunk_tradeoff_time_and_size.svg'), + width = 12, + height = 4.5 + ) + if (!is.null(p_gzip_level)) { + save_plot_svg( + plot = p_gzip_level, + path = file.path(output_dir, 'gzip_level_tradeoff_time_and_size.svg'), + width = 12, + height = 4.5 + ) + } + if (!is.null(p_shuffle)) { + save_plot_svg( + plot = p_shuffle, + path = file.path(output_dir, 'shuffle_effect_summary.svg'), + width = 15, + height = 4.5 + ) + } + + cat(sprintf('Wrote SVG plots to %s\n', output_dir)) + cat(sprintf('Updated README SVG summary at %s\n', readme_svg)) + cat(sprintf('Loaded CSV files: %s\n', paste(selected_results_csv, collapse = ', '))) + cat(sprintf('Plot row filter: %s\n', selection_label)) + 0L +} + +status <- main() +quit(save = 'no', status = status) diff --git a/test/test_h5_benchmarks.py b/test/test_h5_benchmarks.py new file mode 100644 index 0000000..bd6d5d9 --- /dev/null +++ b/test/test_h5_benchmarks.py @@ -0,0 +1,403 @@ +"""HDF5 benchmark sweeps with persisted artifacts for later analysis.""" + +from __future__ import annotations + +import csv +import json +import os +import platform +import sys +import time +from datetime import UTC, datetime +from pathlib import Path + +import h5py +import numpy as np +import pytest + +from modelarrayio.storage.h5_storage import ( + create_empty_scalar_matrix_dataset, +) +from test.benchmark_data_utils import ( + fill_realistic_voxel_stripe, + finalize_plan_metadata, + make_realistic_voxel_benchmark_plan, +) + +RESULT_COLUMNS = [ + 'timestamp_utc', + 'run_kind', + 'num_input_files', + 'target_chunk_mb', + 'compression', + 'compression_level', + 'shuffle', + 'dtype', + 'num_items', + 'chunk_subjects', + 'chunk_items', + 'elapsed_seconds', + 'data_generation_seconds', + 'hdf5_write_seconds', + 'output_size_bytes', + 'output_size_gb', + 'throughput_values_per_second', + 'throughput_mb_per_second', + 'seed', + 'volume_shape', + 'group_mask_voxels', + 'sampled_voxels', + 'noise_std', + 'dropout_range', + 'mean_missing_fraction', + 'std_missing_fraction', + 'workflow_reference', + 'python_version', + 'h5py_version', + 'platform', +] + +_SCHEMA_VALIDATED_PATHS: set[Path] = set() +_BENCHMARK_SEED_BASE = 20260313 + + +def _project_root() -> Path: + return Path(__file__).resolve().parents[1] + + +def _benchmark_results_dir() -> Path: + configured = os.environ.get('MODELARRAYIO_BENCHMARK_RESULTS_DIR') + if configured: + return Path(configured).expanduser().resolve() + return _project_root() / 'benchmark_results' + + +def _xdist_worker_id() -> str | None: + worker = os.environ.get('PYTEST_XDIST_WORKER') + return worker if worker else None + + +def _results_csv_path(results_dir: Path) -> Path: + worker = _xdist_worker_id() + if worker: + return results_dir / f'h5_benchmark_results_{worker}.csv' + return results_dir / 'h5_benchmark_results.csv' + + +def _run_meta_path(results_dir: Path) -> Path: + worker = _xdist_worker_id() + if worker: + return results_dir / f'run_meta_{worker}.json' + return results_dir / 'run_meta.json' + + +def _append_csv_row(csv_path: Path, row: dict[str, object]) -> None: + _append_csv_rows(csv_path, [row]) + + +def _append_csv_rows(csv_path: Path, rows: list[dict[str, object]]) -> None: + if not rows: + return + csv_path = csv_path.resolve() + if csv_path not in _SCHEMA_VALIDATED_PATHS: + _ensure_csv_schema(csv_path) + _SCHEMA_VALIDATED_PATHS.add(csv_path) + csv_path.parent.mkdir(parents=True, exist_ok=True) + needs_header = not csv_path.exists() + with csv_path.open('a', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=RESULT_COLUMNS) + if needs_header: + writer.writeheader() + writer.writerows(rows) + + +def _ensure_csv_schema(csv_path: Path) -> None: + """Migrate older benchmark CSV headers to the current schema in place.""" + if not csv_path.exists(): + return + + with csv_path.open('r', newline='', encoding='utf-8') as f: + rows = list(csv.reader(f)) + if not rows: + return + + old_header = rows[0] + if old_header == RESULT_COLUMNS and all(len(r) == len(RESULT_COLUMNS) for r in rows[1:]): + return + + migrated_rows: list[dict[str, object]] = [] + for values in rows[1:]: + if not values: + continue + old_map = { + key: values[idx] if idx < len(values) else '' for idx, key in enumerate(old_header) + } + migrated_rows.append({column: old_map.get(column, '') for column in RESULT_COLUMNS}) + + with csv_path.open('w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=RESULT_COLUMNS) + writer.writeheader() + writer.writerows(migrated_rows) + + +def _update_run_meta(meta_path: Path, run_kind: str, csv_path: Path) -> None: + benchmark_env_prefix = 'PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 MODELARRAYIO_S3_ANON=1 PYTHONPATH=src' + payload = { + 'updated_utc': datetime.now(UTC).isoformat(), + 'last_run_kind': run_kind, + 'csv_path': str(csv_path), + 'plots_dir': str(meta_path.parent / 'plots'), + 'commands': { + 'quick': ( + f'{benchmark_env_prefix} pytest -m benchmark_quick test/test_h5_benchmarks.py -q' + ), + 'medium': ( + f'{benchmark_env_prefix} pytest -m benchmark_medium test/test_h5_benchmarks.py -q' + ), + 'full': f'{benchmark_env_prefix} pytest -m benchmark_full test/test_h5_benchmarks.py -q', + 'parallel': ( + f'{benchmark_env_prefix} pytest -n auto -m benchmark_full test/test_h5_benchmarks.py -q' + ), + 'plot': 'Rscript test/plot_h5_benchmarks.R', + }, + } + meta_path.parent.mkdir(parents=True, exist_ok=True) + meta_path.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding='utf-8') + + +def _benchmark_seed(num_input_files: int) -> int: + """Use one deterministic seed per cohort size for fair storage-setting comparisons.""" + return _BENCHMARK_SEED_BASE + int(num_input_files) + + +def _run_single_benchmark( + tmp_path: Path, + *, + run_kind: str, + num_input_files: int, + target_chunk_mb: float, + compression: str, + compression_level: int, + shuffle: bool, + seed: int, +) -> dict[str, object]: + plan = make_realistic_voxel_benchmark_plan( + num_input_files=num_input_files, + seed=seed, + max_voxels=0, + require_s3_templates=True, + ) + num_items = plan.num_items + output_file = tmp_path / ( + f'{run_kind}_{num_input_files}_{target_chunk_mb}_{compression}_{compression_level}_{int(shuffle)}.h5' + ) + + started = time.perf_counter() + data_generation_seconds = 0.0 + hdf5_write_seconds = 0.0 + with h5py.File(output_file, 'w') as h5f: + create_started = time.perf_counter() + dset = create_empty_scalar_matrix_dataset( + h5f, + dataset_path='scalars/alff/values', + num_subjects=num_input_files, + num_items=num_items, + storage_dtype='float32', + compression=compression, + compression_level=compression_level, + shuffle=shuffle, + target_chunk_mb=target_chunk_mb, + sources_list=[f'sub-{idx:06d}' for idx in range(num_input_files)], + ) + hdf5_write_seconds += time.perf_counter() - create_started + stripe_width = dset.chunks[1] if dset.chunks is not None else max(1, num_items // 8) + stripe_buffer = np.empty((num_input_files, stripe_width), dtype=np.float32) + missing_counts = np.zeros(num_input_files, dtype=np.int64) + + for start in range(0, num_items, stripe_width): + end = min(start + stripe_width, num_items) + view = stripe_buffer[:, : end - start] + gen_started = time.perf_counter() + missing_counts += fill_realistic_voxel_stripe(plan, start=start, end=end, out=view) + data_generation_seconds += time.perf_counter() - gen_started + write_started = time.perf_counter() + dset[:, start:end] = view + hdf5_write_seconds += time.perf_counter() - write_started + + chunk_subjects, chunk_items = dset.chunks + elapsed = time.perf_counter() - started + + output_size_bytes = output_file.stat().st_size + output_size_gb = output_size_bytes / (1024.0**3) + output_file.unlink(missing_ok=True) + values_written = float(num_input_files * num_items) + throughput_values_per_second = values_written / elapsed if elapsed > 0 else float('inf') + throughput_mb_per_second = ( + values_written * 4.0 / (1024.0 * 1024.0) / elapsed if elapsed > 0 else float('inf') + ) + missing_fractions = missing_counts / float(num_items) + metadata = finalize_plan_metadata( + plan, + mean_missing_fraction=float(np.mean(missing_fractions)), + std_missing_fraction=float(np.std(missing_fractions)), + ) + + return { + 'timestamp_utc': datetime.now(UTC).isoformat(), + 'run_kind': run_kind, + 'num_input_files': num_input_files, + 'target_chunk_mb': float(target_chunk_mb), + 'compression': compression, + 'compression_level': int(compression_level), + 'shuffle': int(shuffle), + 'dtype': 'float32', + 'num_items': int(num_items), + 'chunk_subjects': int(chunk_subjects), + 'chunk_items': int(chunk_items), + 'elapsed_seconds': float(elapsed), + 'data_generation_seconds': float(data_generation_seconds), + 'hdf5_write_seconds': float(hdf5_write_seconds), + 'output_size_bytes': int(output_size_bytes), + 'output_size_gb': float(output_size_gb), + 'throughput_values_per_second': float(throughput_values_per_second), + 'throughput_mb_per_second': float(throughput_mb_per_second), + 'seed': int(seed), + 'volume_shape': json.dumps(metadata['volume_shape']), + 'group_mask_voxels': int(metadata['group_mask_voxels']), + 'sampled_voxels': int(metadata['sampled_voxels']), + 'noise_std': float(metadata['noise_std']), + 'dropout_range': json.dumps(metadata['dropout_range']), + 'mean_missing_fraction': float(metadata['mean_missing_fraction']), + 'std_missing_fraction': float(metadata['std_missing_fraction']), + 'workflow_reference': str(metadata['workflow_reference']), + 'python_version': sys.version.split()[0], + 'h5py_version': h5py.__version__, + 'platform': platform.platform(), + } + + +@pytest.mark.benchmark +@pytest.mark.benchmark_quick +@pytest.mark.parametrize( + ('num_input_files', 'target_chunk_mb', 'compression', 'compression_level', 'shuffle'), + [ + (100, 16.0, 'gzip', 1, True), + (100, 32.0, 'gzip', 1, True), + (100, 32.0, 'gzip', 4, False), + (100, 64.0, 'gzip', 9, True), + (100, 32.0, 'lzf', 0, True), + (100, 64.0, 'lzf', 0, False), + (100, 32.0, 'none', 0, False), + (1000, 16.0, 'gzip', 1, True), + (1000, 32.0, 'gzip', 4, True), + (1000, 64.0, 'gzip', 9, False), + (1000, 32.0, 'lzf', 0, True), + (1000, 32.0, 'none', 0, False), + ], +) +def test_h5_benchmark_quick_subset( + tmp_path, + num_input_files: int, + target_chunk_mb: float, + compression: str, + compression_level: int, + shuffle: bool, +) -> None: + """Fast benchmark subset for interactive inspection.""" + results_dir = _benchmark_results_dir() + csv_path = _results_csv_path(results_dir) + meta_path = _run_meta_path(results_dir) + row = _run_single_benchmark( + tmp_path=tmp_path, + run_kind='quick', + num_input_files=num_input_files, + target_chunk_mb=target_chunk_mb, + compression=compression, + compression_level=compression_level, + shuffle=shuffle, + seed=_benchmark_seed(num_input_files), + ) + _append_csv_row(csv_path, row) + _update_run_meta(meta_path, run_kind='quick', csv_path=csv_path) + + assert row['elapsed_seconds'] > 0 + assert row['output_size_bytes'] > 0 + assert row['output_size_gb'] >= 0 + + +@pytest.mark.benchmark +@pytest.mark.benchmark_medium +@pytest.mark.parametrize('num_input_files', [100, 1000, 10000]) +@pytest.mark.parametrize('target_chunk_mb', [4.0, 8.0, 16.0, 32.0, 64.0]) +@pytest.mark.parametrize('compression', ['gzip', 'lzf', 'none']) +@pytest.mark.parametrize('shuffle', [True, False]) +def test_h5_benchmark_medium_sweep( + tmp_path, + num_input_files: int, + target_chunk_mb: float, + compression: str, + shuffle: bool, +) -> None: + """Medium benchmark sweep excluding the largest cohort size.""" + results_dir = _benchmark_results_dir() + csv_path = _results_csv_path(results_dir) + meta_path = _run_meta_path(results_dir) + compression_levels = [1, 4, 9] if compression == 'gzip' else [0] + rows: list[dict[str, object]] = [] + for compression_level in compression_levels: + row = _run_single_benchmark( + tmp_path=tmp_path, + run_kind='medium', + num_input_files=num_input_files, + target_chunk_mb=target_chunk_mb, + compression=compression, + compression_level=compression_level, + shuffle=shuffle, + seed=_benchmark_seed(num_input_files), + ) + rows.append(row) + + assert row['elapsed_seconds'] > 0 + assert row['output_size_bytes'] > 0 + assert row['output_size_gb'] >= 0 + _append_csv_rows(csv_path, rows) + _update_run_meta(meta_path, run_kind='medium', csv_path=csv_path) + + +@pytest.mark.benchmark +@pytest.mark.benchmark_full +@pytest.mark.parametrize('num_input_files', [100, 1000, 10000, 40000]) +@pytest.mark.parametrize('target_chunk_mb', [4.0, 8.0, 16.0, 32.0, 64.0]) +@pytest.mark.parametrize('compression', ['gzip', 'lzf', 'none']) +@pytest.mark.parametrize('shuffle', [True, False]) +def test_h5_benchmark_full_sweep( + tmp_path, + num_input_files: int, + target_chunk_mb: float, + compression: str, + shuffle: bool, +) -> None: + """Full benchmark sweep for publication-grade comparisons.""" + results_dir = _benchmark_results_dir() + csv_path = _results_csv_path(results_dir) + meta_path = _run_meta_path(results_dir) + compression_levels = [1, 4, 9] if compression == 'gzip' else [0] + rows: list[dict[str, object]] = [] + for compression_level in compression_levels: + row = _run_single_benchmark( + tmp_path=tmp_path, + run_kind='full', + num_input_files=num_input_files, + target_chunk_mb=target_chunk_mb, + compression=compression, + compression_level=compression_level, + shuffle=shuffle, + seed=_benchmark_seed(num_input_files), + ) + rows.append(row) + + assert row['elapsed_seconds'] > 0 + assert row['output_size_bytes'] > 0 + assert row['output_size_gb'] >= 0 + _append_csv_rows(csv_path, rows) + _update_run_meta(meta_path, run_kind='full', csv_path=csv_path) From c0005f59ca83e0cee96a512ef2e07e95b1d03e13 Mon Sep 17 00:00:00 2001 From: Tien Tong <35613222+tien-tong@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:40:33 -0400 Subject: [PATCH 2/4] fix ruff --- test/benchmark_data_utils.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/test/benchmark_data_utils.py b/test/benchmark_data_utils.py index 832b988..13b3a88 100644 --- a/test/benchmark_data_utils.py +++ b/test/benchmark_data_utils.py @@ -200,9 +200,9 @@ def make_realistic_voxel_benchmark_plan( subject_key_base_u64 = ( np.arange(num_input_files, dtype=np.uint64)[:, np.newaxis] * _SUBJECT_MUL ) ^ seed_u64 - dropout_thresholds_u64 = ( - dropout_probs.astype(np.float64) * _U64_FLOAT_DENOM - ).astype(np.uint64)[:, np.newaxis] + dropout_thresholds_u64 = (dropout_probs.astype(np.float64) * _U64_FLOAT_DENOM).astype( + np.uint64 + )[:, np.newaxis] uses_s3_templates = template_rows.shape[1] > 0 and volume_shape != (16, 16, 8) metadata: dict[str, object] = { @@ -260,7 +260,9 @@ def fill_realistic_voxel_stripe( Destination array with shape (num_subjects, >= stripe_width), dtype float32. """ if start < 0 or end <= start or end > plan.num_items: - raise ValueError(f'invalid stripe bounds: start={start}, end={end}, num_items={plan.num_items}') + raise ValueError( + f'invalid stripe bounds: start={start}, end={end}, num_items={plan.num_items}' + ) stripe_width = end - start if out.shape[0] != plan.num_subjects or out.shape[1] < stripe_width: raise ValueError( @@ -294,9 +296,9 @@ def fill_realistic_voxel_stripe( u2 = _u64_to_unit_float(_splitmix64(key_matrix ^ _SALT_NOISE_2)) u1 = np.clip(u1, 1e-12, 1.0) noise = np.sqrt(-2.0 * np.log(u1)) * np.cos(2.0 * np.pi * u2) - scaled_noise = noise.astype(np.float32, copy=False) * (plan.noise_std * sd_slice)[ - np.newaxis, : - ] + scaled_noise = ( + noise.astype(np.float32, copy=False) * (plan.noise_std * sd_slice)[np.newaxis, :] + ) view[valid_mask] = view[valid_mask] + scaled_noise[valid_mask] return np.count_nonzero(np.isnan(view), axis=1) @@ -356,7 +358,8 @@ def make_realistic_voxel_benchmark_dataset( for subject_idx in range(plan.num_subjects): row = ( plan.voxel_mean - + plan.subject_scales[subject_idx] * (plan.sampled_templates[plan.template_indices[subject_idx]] - plan.voxel_mean) + + plan.subject_scales[subject_idx] + * (plan.sampled_templates[plan.template_indices[subject_idx]] - plan.voxel_mean) + plan.subject_offsets[subject_idx] * plan.voxel_sd ).astype(np.float32, copy=False) @@ -364,7 +367,9 @@ def make_realistic_voxel_benchmark_dataset( (np.uint64(plan.seed) << np.uint64(1)) ^ np.uint64(subject_idx + 0xA5A5A5A5) ) row_rng = np.random.default_rng(row_seed) - row[row_rng.random(plan.num_items, dtype=np.float32) < plan.dropout_probs[subject_idx]] = np.nan + row[row_rng.random(plan.num_items, dtype=np.float32) < plan.dropout_probs[subject_idx]] = ( + np.nan + ) valid = np.isfinite(row) if np.any(valid): From b7df5447fb2bba36472c2425fe682429ff9e184f Mon Sep 17 00:00:00 2001 From: Tien Tong <35613222+tien-tong@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:06:45 -0400 Subject: [PATCH 3/4] remove matplotlib dependency --- pyproject.toml | 1 - src/modelarrayio/cli/utils.py | 4 ++-- test/test_h5_benchmarks.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 234ad14..d854567 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,6 @@ test = [ "pytest-cov>=5", "pytest-xdist>=3", "pytest-env>=1.0", - "matplotlib>=3.8", ] all = ["modelarrayio[doc,test,s3]"] diff --git a/src/modelarrayio/cli/utils.py b/src/modelarrayio/cli/utils.py index 36918b4..1fba228 100644 --- a/src/modelarrayio/cli/utils.py +++ b/src/modelarrayio/cli/utils.py @@ -170,14 +170,14 @@ def read_result_names( def _decode_names(values: object) -> list[str]: if isinstance(values, np.ndarray): sequence = values.tolist() - elif isinstance(values, (list, tuple)): + elif isinstance(values, list | tuple): sequence = list(values) else: sequence = [values] decoded: list[str] = [] for value in sequence: - if isinstance(value, (bytes, bytearray, np.bytes_)): + if isinstance(value, bytes | bytearray | np.bytes_): text = value.decode('utf-8', errors='ignore') else: text = str(value) diff --git a/test/test_h5_benchmarks.py b/test/test_h5_benchmarks.py index bd6d5d9..1a65a3d 100644 --- a/test/test_h5_benchmarks.py +++ b/test/test_h5_benchmarks.py @@ -74,7 +74,7 @@ def _benchmark_results_dir() -> Path: def _xdist_worker_id() -> str | None: worker = os.environ.get('PYTEST_XDIST_WORKER') - return worker if worker else None + return worker or None def _results_csv_path(results_dir: Path) -> Path: From fb63ee2f7bfee5f06b0331cfb1e09efa6a1e911a Mon Sep 17 00:00:00 2001 From: Tien Tong <35613222+tien-tong@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:16:52 -0400 Subject: [PATCH 4/4] remove benchmark from CI --- tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index e422b2f..5de7b2c 100644 --- a/tox.ini +++ b/tox.ini @@ -20,7 +20,7 @@ DEPENDS = latest: latest [testenv] -description = Pytest with coverage (excludes network S3 tests) +description = Pytest with coverage (excludes network S3 and benchmark tests) labels = test setenv = COVERAGE_FILE = {toxinidir}/.tox/.coverage.{envname} @@ -45,7 +45,7 @@ uv_resolution = min: lowest-direct commands = - pytest -m "not s3" --cov=modelarrayio --cov-config={toxinidir}/pyproject.toml --cov-report=term-missing --cov-report=xml {posargs:test} + pytest -m "not s3 and not benchmark" --cov=modelarrayio --cov-config={toxinidir}/pyproject.toml --cov-report=term-missing --cov-report=xml {posargs:test} [testenv:lint] runner = uv-venv-lock-runner