From 8f6a3f906c5f301aa775b12fdc884b159efdb7c3 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Mon, 27 Apr 2026 23:14:47 -0700 Subject: [PATCH] Formatting --- NAM/get_dsp.cpp | 6 +- NAM/wavenet/a2_fast.cpp | 147 ++++++++++++++++-------------------- NAM/wavenet/a2_fast.h | 8 +- NAM/wavenet/model.cpp | 2 +- NAM/wavenet/slimmable.cpp | 3 +- NAM/wavenet/slimmable.h | 2 +- tools/bench_a2_fast.cpp | 40 +++++----- tools/test/test_a2_fast.cpp | 41 +++++----- tools/test/test_get_dsp.cpp | 9 +-- 9 files changed, 119 insertions(+), 139 deletions(-) diff --git a/NAM/get_dsp.cpp b/NAM/get_dsp.cpp index 423e8142..3aa85924 100644 --- a/NAM/get_dsp.cpp +++ b/NAM/get_dsp.cpp @@ -42,7 +42,7 @@ class CoreVersionSupportChecker : public IVersionSupportChecker std::vector>& version_support_registry() { static std::vector> registry{ - std::make_shared()}; + std::make_shared()}; return registry; } @@ -123,8 +123,8 @@ void verify_config_version(const std::string versionStr) if (support == Supported::PARTIAL) { std::stringstream ss; - std::cerr << "Model config is a partially-supported version " << versionStr - << ". Continuing with partial support." << std::endl; + std::cerr << "Model config is a partially-supported version " << versionStr << ". Continuing with partial support." + << std::endl; } } diff --git a/NAM/wavenet/a2_fast.cpp b/NAM/wavenet/a2_fast.cpp index 321180e7..276ed1a6 100644 --- a/NAM/wavenet/a2_fast.cpp +++ b/NAM/wavenet/a2_fast.cpp @@ -1,31 +1,31 @@ #if defined(NAM_ENABLE_A2_FAST) -// Ring-buffer strategy: -// 0 = linear memmove-rewind (variable worst-case latency, sporadic spikes) -// 1 = pow2 + tail mirror (constant per-block work, branchless reads) -// Controlled externally with -DNAM_A2_RING_MODE=0 for head-to-head comparison. -#ifndef NAM_A2_RING_MODE -#define NAM_A2_RING_MODE 1 -#endif - -#include "a2_fast.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "../dsp.h" + // Ring-buffer strategy: + // 0 = linear memmove-rewind (variable worst-case latency, sporadic spikes) + // 1 = pow2 + tail mirror (constant per-block work, branchless reads) + // Controlled externally with -DNAM_A2_RING_MODE=0 for head-to-head comparison. + #ifndef NAM_A2_RING_MODE + #define NAM_A2_RING_MODE 1 + #endif + + #include "a2_fast.h" + + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + + #include + + #include "../dsp.h" namespace nam { @@ -92,7 +92,7 @@ class A2FastModel : public DSP // Conv1D input history ring buffer, column-major (Channels rows). std::vector history; -#if NAM_A2_RING_MODE == 1 + #if NAM_A2_RING_MODE == 1 // pow2 ring + tail mirror. Storage = (pow2_size + max_buffer_size) cols. // write_pos is kept in [0, pow2_size), reads use (pos & pow2_mask) and are // always contiguous because cols [pow2_size, pow2_size + max_buffer_size) @@ -100,12 +100,12 @@ class A2FastModel : public DSP int pow2_size = 0; int pow2_mask = 0; int write_pos = 0; -#else + #else // Linear ring with sporadic memmove-rewind. history_cols = 2*max_lookback + // max_buffer_size; write_pos grows monotonically until rewind fires. int history_cols = 0; int write_pos = 0; -#endif + #endif }; std::array _layers; @@ -124,20 +124,20 @@ class A2FastModel : public DSP // Head ring buffer (Channels rows, col-major). Same ring layout as per-layer. std::vector _head_history; -#if NAM_A2_RING_MODE == 1 + #if NAM_A2_RING_MODE == 1 int _head_pow2_size = 0; int _head_pow2_mask = 0; int _head_write_pos = 0; -#else + #else int _head_history_cols = 0; int _head_write_pos = 0; -#endif + #endif // Working buffers (all Channels rows, max_buffer_size cols, col-major). std::vector _layer_in; // current layer input / next layer input (in-place residual) std::vector _head_sum; // accumulates activations across all layers - std::vector _z; // per-layer conv output accumulator (tap-major) - std::vector _cond; // float32 copy of the double NAM_SAMPLE input, reused each block + std::vector _z; // per-layer conv output accumulator (tap-major) + std::vector _cond; // float32 copy of the double NAM_SAMPLE input, reused each block std::vector _head_out; // float32 head output before writing to NAM_SAMPLE int _prewarm_samples = 0; @@ -300,29 +300,29 @@ void A2FastModel::SetMaxBufferSize(int maxBufferSize) for (auto& L : _layers) { -#if NAM_A2_RING_MODE == 1 + #if NAM_A2_RING_MODE == 1 L.pow2_size = next_pow2(L.max_lookback + maxBufferSize); L.pow2_mask = L.pow2_size - 1; L.history.assign(static_cast(Channels) * (L.pow2_size + maxBufferSize), 0.0f); L.write_pos = L.max_lookback; -#else + #else L.history_cols = 2 * L.max_lookback + maxBufferSize; L.history.assign(static_cast(Channels) * L.history_cols, 0.0f); L.write_pos = L.max_lookback; -#endif + #endif } const int head_lookback = kHeadKernelSize - 1; -#if NAM_A2_RING_MODE == 1 + #if NAM_A2_RING_MODE == 1 _head_pow2_size = next_pow2(head_lookback + maxBufferSize); _head_pow2_mask = _head_pow2_size - 1; _head_history.assign(static_cast(Channels) * (_head_pow2_size + maxBufferSize), 0.0f); _head_write_pos = head_lookback; -#else + #else _head_history_cols = 2 * head_lookback + maxBufferSize; _head_history.assign(static_cast(Channels) * _head_history_cols, 0.0f); _head_write_pos = head_lookback; -#endif + #endif } // ----------------------------------------------------------------------------- @@ -336,23 +336,22 @@ void A2FastModel::SetMaxBufferSize(int maxBufferSize) template void A2FastModel::_ring_write(Layer& L, int num_frames) { -#if NAM_A2_RING_MODE == 1 + #if NAM_A2_RING_MODE == 1 const int mbs = GetMaxBufferSize(); float* const hist = L.history.data(); const float* const src = _layer_in.data(); const int wp = L.write_pos; const int first = std::min(num_frames, L.pow2_size - wp); - std::memcpy(hist + static_cast(wp) * Channels, src, - static_cast(first) * Channels * sizeof(float)); + std::memcpy(hist + static_cast(wp) * Channels, src, static_cast(first) * Channels * sizeof(float)); if (first < num_frames) { std::memcpy(hist, src + static_cast(first) * Channels, static_cast(num_frames - first) * Channels * sizeof(float)); } - std::memcpy(hist + static_cast(L.pow2_size) * Channels, hist, - static_cast(mbs) * Channels * sizeof(float)); + std::memcpy( + hist + static_cast(L.pow2_size) * Channels, hist, static_cast(mbs) * Channels * sizeof(float)); L.write_pos = (wp + num_frames) & L.pow2_mask; -#else + #else if (L.write_pos + num_frames > L.history_cols) { const int keep = L.max_lookback; @@ -363,29 +362,28 @@ void A2FastModel::_ring_write(Layer& L, int num_frames) std::memcpy(L.history.data() + static_cast(L.write_pos) * Channels, _layer_in.data(), static_cast(num_frames) * Channels * sizeof(float)); L.write_pos += num_frames; -#endif + #endif } template void A2FastModel::_head_ring_write(int num_frames) { -#if NAM_A2_RING_MODE == 1 + #if NAM_A2_RING_MODE == 1 const int mbs = GetMaxBufferSize(); float* const hist = _head_history.data(); const float* const src = _head_sum.data(); const int wp = _head_write_pos; const int first = std::min(num_frames, _head_pow2_size - wp); - std::memcpy(hist + static_cast(wp) * Channels, src, - static_cast(first) * Channels * sizeof(float)); + std::memcpy(hist + static_cast(wp) * Channels, src, static_cast(first) * Channels * sizeof(float)); if (first < num_frames) { std::memcpy(hist, src + static_cast(first) * Channels, static_cast(num_frames - first) * Channels * sizeof(float)); } - std::memcpy(hist + static_cast(_head_pow2_size) * Channels, hist, - static_cast(mbs) * Channels * sizeof(float)); + std::memcpy( + hist + static_cast(_head_pow2_size) * Channels, hist, static_cast(mbs) * Channels * sizeof(float)); _head_write_pos = (wp + num_frames) & _head_pow2_mask; -#else + #else const int keep = kHeadKernelSize - 1; if (_head_write_pos + num_frames > _head_history_cols) { @@ -396,7 +394,7 @@ void A2FastModel::_head_ring_write(int num_frames) std::memcpy(_head_history.data() + static_cast(_head_write_pos) * Channels, _head_sum.data(), static_cast(num_frames) * Channels * sizeof(float)); _head_write_pos += num_frames; -#endif + #endif } // ----------------------------------------------------------------------------- @@ -418,15 +416,13 @@ void A2FastModel::_layer_forward_k(Layer& L, const float* cond, int nu // D` samples into the past. In pow2 mode the position is wrapped by mask and // reads spanning the wrap land in the tail mirror; in linear mode write_pos // is monotonic and arithmetic is plain. -#if NAM_A2_RING_MODE == 1 + #if NAM_A2_RING_MODE == 1 const int mask = L.pow2_mask; - auto tap_base_phys = [&](int taps_back) { - return (L.write_pos - num_frames - taps_back * D) & mask; - }; -#else + auto tap_base_phys = [&](int taps_back) { return (L.write_pos - num_frames - taps_back * D) & mask; }; + #else const int base = L.write_pos - num_frames; auto tap_base_phys = [&](int taps_back) { return base - taps_back * D; }; -#endif + #endif // Two conv strategies, dispatched at compile time on Channels: // @@ -598,10 +594,10 @@ void A2FastModel::_layer_forward_k(Layer& L, const float* cond, int nu // Post-conv: bias, mixin, LeakyReLU, head_sum, 1x1 residual — all block ops. ztile.colwise() += conv_b_vec; - ztile.noalias() += mixin_vec * cond_row; // rank-1 outer product + ztile.noalias() += mixin_vec * cond_row; // rank-1 outer product ztile = (ztile.array() < 0.0f).select(ztile.array() * kLeakySlope, ztile.array()); hsum_block += ztile; - lin_block.noalias() += l1x1_mat * ztile; // 8x8 × 8xN GEMM + lin_block.noalias() += l1x1_mat * ztile; // 8x8 × 8xN GEMM lin_block.colwise() += l1x1_b_vec; } } @@ -616,15 +612,9 @@ void A2FastModel::_layer_forward(int layer_idx, const float* cond, int _ring_write(L, num_frames); switch (L.kernel_size) { - case 6: - _layer_forward_k<6>(L, cond, num_frames); - break; - case 15: - _layer_forward_k<15>(L, cond, num_frames); - break; - default: - throw std::runtime_error("A2FastModel: unexpected kernel_size " - + std::to_string(L.kernel_size)); + case 6: _layer_forward_k<6>(L, cond, num_frames); break; + case 15: _layer_forward_k<15>(L, cond, num_frames); break; + default: throw std::runtime_error("A2FastModel: unexpected kernel_size " + std::to_string(L.kernel_size)); } } @@ -635,15 +625,13 @@ template void A2FastModel::_head_forward(float* output, int num_frames) { _head_ring_write(num_frames); -#if NAM_A2_RING_MODE == 1 + #if NAM_A2_RING_MODE == 1 const int mask = _head_pow2_mask; - auto col_of = [&](int f, int k) { - return (_head_write_pos - num_frames + f - (kHeadKernelSize - 1 - k)) & mask; - }; -#else + auto col_of = [&](int f, int k) { return (_head_write_pos - num_frames + f - (kHeadKernelSize - 1 - k)) & mask; }; + #else const int base = _head_write_pos - num_frames; auto col_of = [&](int f, int k) { return base + f - (kHeadKernelSize - 1 - k); }; -#endif + #endif for (int f = 0; f < num_frames; f++) { @@ -874,9 +862,8 @@ bool is_a2_shape(const nlohmann::json& config, int* channels) return false; // No FiLM anywhere - for (const char* key : - {"conv_pre_film", "conv_post_film", "input_mixin_pre_film", "input_mixin_post_film", "activation_pre_film", - "activation_post_film", "layer1x1_post_film", "head1x1_post_film"}) + for (const char* key : {"conv_pre_film", "conv_post_film", "input_mixin_pre_film", "input_mixin_post_film", + "activation_pre_film", "activation_post_film", "layer1x1_post_film", "head1x1_post_film"}) { if (!film_inactive(la, key)) return false; diff --git a/NAM/wavenet/a2_fast.h b/NAM/wavenet/a2_fast.h index f5963f38..03cf439d 100644 --- a/NAM/wavenet/a2_fast.h +++ b/NAM/wavenet/a2_fast.h @@ -14,11 +14,11 @@ #if defined(NAM_ENABLE_A2_FAST) -#include -#include + #include + #include -#include "../model_config.h" -#include "json.hpp" + #include "../model_config.h" + #include "json.hpp" namespace nam { diff --git a/NAM/wavenet/model.cpp b/NAM/wavenet/model.cpp index 00d5a539..eaf74add 100644 --- a/NAM/wavenet/model.cpp +++ b/NAM/wavenet/model.cpp @@ -13,7 +13,7 @@ #include "model.h" #if defined(NAM_ENABLE_A2_FAST) -#include "a2_fast.h" + #include "a2_fast.h" #endif // detail::Head (WaveNet post-stack head) ===================================== diff --git a/NAM/wavenet/slimmable.cpp b/NAM/wavenet/slimmable.cpp index c397914e..19b019e2 100644 --- a/NAM/wavenet/slimmable.cpp +++ b/NAM/wavenet/slimmable.cpp @@ -309,8 +309,7 @@ void SlimmableWavenet::_pending_store_release(std::shared_ptr p std::shared_ptr SlimmableWavenet::_pending_exchange_take_acq_rel() { - return std::atomic_exchange_explicit( - &_pending_staged, std::shared_ptr{}, std::memory_order_acq_rel); + return std::atomic_exchange_explicit(&_pending_staged, std::shared_ptr{}, std::memory_order_acq_rel); } #else void SlimmableWavenet::_pending_clear_release() diff --git a/NAM/wavenet/slimmable.h b/NAM/wavenet/slimmable.h index 57968a4c..f0687c09 100644 --- a/NAM/wavenet/slimmable.h +++ b/NAM/wavenet/slimmable.h @@ -6,7 +6,7 @@ #ifdef _LIBCPP_VERSION // libc++: std::atomic> is not viable; staging uses deprecated atomic_* free functions. #else -#include + #include #endif #include "../dsp.h" diff --git a/tools/bench_a2_fast.cpp b/tools/bench_a2_fast.cpp index 4b2564d1..2dcaff1e 100644 --- a/tools/bench_a2_fast.cpp +++ b/tools/bench_a2_fast.cpp @@ -12,26 +12,26 @@ #if defined(NAM_ENABLE_A2_FAST) -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include -#include "json.hpp" + #include "json.hpp" -#include "NAM/dsp.h" -#include "NAM/wavenet/a2_fast.h" -#include "NAM/wavenet/model.h" + #include "NAM/dsp.h" + #include "NAM/wavenet/a2_fast.h" + #include "NAM/wavenet/model.h" using clock_t_hr = std::chrono::high_resolution_clock; using ns = std::chrono::nanoseconds; @@ -220,8 +220,8 @@ void bench_model(const LoadedModel& m, const Options& o) auto fmt_us = [](double ms) { return ms * 1000.0; }; std::cout << "\n== " << m.path << " (" << arch << ", Channels=" << channels << ") ==\n"; std::cout << " audio=" << std::fixed << std::setprecision(0) << (1000.0 * total / m.sample_rate) << "ms @ " - << static_cast(m.sample_rate) << "Hz, block=" << o.buffer_size << ", iters=" << o.iterations - << " (" << fast_s.n << " blocks timed each)\n"; + << static_cast(m.sample_rate) << "Hz, block=" << o.buffer_size << ", iters=" << o.iterations << " (" + << fast_s.n << " blocks timed each)\n"; std::cout << " per-block audio deadline: " << std::fixed << std::setprecision(1) << block_audio_us << " us\n"; std::cout << std::fixed << std::setprecision(2); std::cout << " min p50 p99 p99.9 max mean\n"; diff --git a/tools/test/test_a2_fast.cpp b/tools/test/test_a2_fast.cpp index 8bd2d1f5..87c07030 100644 --- a/tools/test/test_a2_fast.cpp +++ b/tools/test/test_a2_fast.cpp @@ -6,24 +6,24 @@ #if defined(NAM_ENABLE_A2_FAST) -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include -#include "json.hpp" + #include "json.hpp" -#include "NAM/dsp.h" -#include "NAM/wavenet/a2_fast.h" -#include "NAM/wavenet/model.h" + #include "NAM/dsp.h" + #include "NAM/wavenet/a2_fast.h" + #include "NAM/wavenet/model.h" -#include "allocation_tracking.h" + #include "allocation_tracking.h" namespace test_a2_fast { @@ -98,7 +98,7 @@ int a2_weight_count(int channels) { const int K = nam::wavenet::a2_fast::kKernelSizes[i]; total += bn * channels * K + bn; // conv1d weights + bias - total += bn; // input mixin (no bias) + total += bn; // input mixin (no bias) total += channels * bn + channels; // layer1x1 + bias } total += channels * nam::wavenet::a2_fast::kHeadKernelSize + 1; // head rechannel + bias @@ -123,8 +123,7 @@ std::vector make_test_input(int num_frames, double sample_rate) for (int i = 0; i < num_frames; i++) { const double t = static_cast(i) / sample_rate; - in[i] = static_cast(0.25 * std::sin(2.0 * M_PI * 220.0 * t) - + 0.10 * std::sin(2.0 * M_PI * 1230.0 * t)); + in[i] = static_cast(0.25 * std::sin(2.0 * M_PI * 220.0 * t) + 0.10 * std::sin(2.0 * M_PI * 1230.0 * t)); } return in; } @@ -296,8 +295,7 @@ void test_process_realtime_safe(int channels) for (int block : {1, 32, 64, 128, 256}) { - std::string test_name = "A2FastModel<" + std::to_string(channels) + ">::process block=" - + std::to_string(block); + std::string test_name = "A2FastModel<" + std::to_string(channels) + ">::process block=" + std::to_string(block); allocation_tracking::run_allocation_test_no_allocations( nullptr, [&]() { @@ -312,8 +310,7 @@ void test_process_realtime_safe(int channels) pos += block; } }, - nullptr, - test_name.c_str()); + nullptr, test_name.c_str()); } } diff --git a/tools/test/test_get_dsp.cpp b/tools/test/test_get_dsp.cpp index 36aee83d..150f50fe 100644 --- a/tools/test/test_get_dsp.cpp +++ b/tools/test/test_get_dsp.cpp @@ -253,19 +253,16 @@ void test_version_too_early() void test_is_version_supported_core_behavior() { - assert(nam::is_version_supported(nam::LATEST_FULLY_SUPPORTED_NAM_FILE_VERSION) - == nam::Supported::YES); + assert(nam::is_version_supported(nam::LATEST_FULLY_SUPPORTED_NAM_FILE_VERSION) == nam::Supported::YES); nam::Version patchBeyondLatest = nam::ParseVersion(nam::LATEST_FULLY_SUPPORTED_NAM_FILE_VERSION); patchBeyondLatest.patch++; - assert(nam::is_version_supported(patchBeyondLatest.toString()) - == nam::Supported::PARTIAL); + assert(nam::is_version_supported(patchBeyondLatest.toString()) == nam::Supported::PARTIAL); nam::Version minorBeyondLatest = nam::ParseVersion(nam::LATEST_FULLY_SUPPORTED_NAM_FILE_VERSION); minorBeyondLatest.minor++; minorBeyondLatest.patch = 0; - assert(nam::is_version_supported(minorBeyondLatest.toString()) - == nam::Supported::NO); + assert(nam::is_version_supported(minorBeyondLatest.toString()) == nam::Supported::NO); } void test_register_custom_version_support_checker()