-
Notifications
You must be signed in to change notification settings - Fork 3
feat: add memory profiling support #39
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
acce038
68e21b6
dfadbfd
1b97510
5292441
c6e8f77
003a1bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| +1 −0 | .github/workflows/ci.yml | |
| +5 −2 | Justfile | |
| +3,280 −2,736 | dist/core.c | |
| +0 −13 | src/c.zig | |
| +6 −0 | src/instruments/analysis.zig | |
| +61 −0 | src/instruments/fifo_instrument.zig | |
| +3 −155 | src/instruments/perf.zig | |
| +25 −14 | src/instruments/root.zig | |
| +5 −1 | src/instruments/valgrind.zig | |
| +199 −0 | src/runner_fifo.zig | |
| +20 −0 | src/shared.zig | |
| +52 −20 | src/tests/deserialize_rust/create_serialized.rs | |
| +29 −0 | src/tests/deserialize_rust/rust_deser.zig | |
| +138 −0 | src/tests/deserialize_rust/serialized.zig |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| ../google_benchmark_cmake/memory_bench.hpp |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,197 @@ | ||
| #pragma once | ||
|
|
||
| #include <benchmark/benchmark.h> | ||
|
|
||
| #include <string> | ||
| #include <vector> | ||
|
|
||
| // Run-length encoding: compress consecutive repeated characters | ||
| // Example: "aaabbbccc" -> "3a3b3c" | ||
| // NOTE: Intentionally inefficient - no pre-allocation to show multiple | ||
| // allocations | ||
| static std::string rle_encode(const std::string& input) { | ||
| if (input.empty()) return ""; | ||
|
|
||
| std::string result; // No reserve - will trigger multiple reallocations | ||
|
|
||
| char current = input[0]; | ||
| size_t count = 1; | ||
|
|
||
| for (size_t i = 1; i < input.size(); ++i) { | ||
| if (input[i] == current) { | ||
| count++; | ||
| } else { | ||
| // Create intermediate strings for each run | ||
| std::string count_str = std::to_string(count); | ||
| std::string run_encoded = count_str + current; | ||
| result += run_encoded; // Concatenation causes reallocations | ||
| current = input[i]; | ||
| count = 1; | ||
| } | ||
| } | ||
|
|
||
| // Final run | ||
| std::string count_str = std::to_string(count); | ||
| std::string final_run = count_str + current; | ||
| result += final_run; | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| // Run-length decoding: decompress RLE encoded string | ||
| // Example: "3a3b3c" -> "aaabbbccc" | ||
| static std::string rle_decode(const std::string& input) { | ||
| std::string result; | ||
| size_t i = 0; | ||
|
|
||
| while (i < input.size()) { | ||
| // Parse the count | ||
| size_t count = 0; | ||
| while (i < input.size() && std::isdigit(input[i])) { | ||
| count = count * 10 + (input[i] - '0'); | ||
| i++; | ||
| } | ||
|
|
||
| // Get the character | ||
| if (i < input.size()) { | ||
| char ch = input[i]; | ||
| result.append(count, ch); | ||
| i++; | ||
| } | ||
| } | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| // Generate a string with patterns for RLE | ||
| static std::string generate_rle_input(size_t size, size_t run_length) { | ||
| std::string result; | ||
| result.reserve(size); | ||
|
|
||
| const std::string chars = "abcdefghijklmnopqrstuvwxyz"; | ||
| size_t char_idx = 0; | ||
|
|
||
| while (result.size() < size) { | ||
| size_t count = std::min(run_length, size - result.size()); | ||
| result.append(count, chars[char_idx % chars.size()]); | ||
| char_idx++; | ||
| } | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| // Benchmark: RLE encoding with small runs (high compression) | ||
| static void BM_RLE_Encode_SmallRuns(benchmark::State& state) { | ||
| const size_t input_size = state.range(0); | ||
| std::string input = generate_rle_input(input_size, 3); | ||
|
|
||
| for (auto _ : state) { | ||
| std::string encoded = rle_encode(input); | ||
| benchmark::DoNotOptimize(encoded); | ||
| benchmark::ClobberMemory(); | ||
| } | ||
|
|
||
| state.SetBytesProcessed(state.iterations() * input_size); | ||
| } | ||
| BENCHMARK(BM_RLE_Encode_SmallRuns) | ||
| ->Arg(100) | ||
| ->Arg(1000) | ||
| ->Arg(10000) | ||
| ->Arg(100000); | ||
|
|
||
| // Benchmark: RLE encoding with large runs (low compression) | ||
| static void BM_RLE_Encode_LargeRuns(benchmark::State& state) { | ||
| const size_t input_size = state.range(0); | ||
| std::string input = generate_rle_input(input_size, 100); | ||
|
|
||
| for (auto _ : state) { | ||
| std::string encoded = rle_encode(input); | ||
| benchmark::DoNotOptimize(encoded); | ||
| benchmark::ClobberMemory(); | ||
| } | ||
|
|
||
| state.SetBytesProcessed(state.iterations() * input_size); | ||
| } | ||
| BENCHMARK(BM_RLE_Encode_LargeRuns) | ||
| ->Arg(100) | ||
| ->Arg(1000) | ||
| ->Arg(10000) | ||
| ->Arg(100000); | ||
|
|
||
| // Benchmark: RLE decoding | ||
| static void BM_RLE_Decode(benchmark::State& state) { | ||
| const size_t input_size = state.range(0); | ||
| std::string input = generate_rle_input(input_size, 10); | ||
| std::string encoded = rle_encode(input); | ||
|
|
||
| for (auto _ : state) { | ||
| std::string decoded = rle_decode(encoded); | ||
| benchmark::DoNotOptimize(decoded); | ||
| benchmark::ClobberMemory(); | ||
| } | ||
|
|
||
| state.SetBytesProcessed(state.iterations() * encoded.size()); | ||
| } | ||
| BENCHMARK(BM_RLE_Decode)->Arg(100)->Arg(1000)->Arg(10000)->Arg(100000); | ||
|
|
||
| // Benchmark: Vector allocations (resizing pattern) | ||
| static void BM_Vector_PushBack(benchmark::State& state) { | ||
| const size_t count = state.range(0); | ||
|
|
||
| for (auto _ : state) { | ||
| std::vector<int> vec; | ||
| for (size_t i = 0; i < count; ++i) { | ||
| vec.push_back(static_cast<int>(i)); | ||
| } | ||
| benchmark::DoNotOptimize(vec); | ||
| benchmark::ClobberMemory(); | ||
| } | ||
| } | ||
| BENCHMARK(BM_Vector_PushBack)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000); | ||
|
|
||
| // Benchmark: Vector allocations with reserve (optimized) | ||
| static void BM_Vector_Reserve(benchmark::State& state) { | ||
| const size_t count = state.range(0); | ||
|
|
||
| for (auto _ : state) { | ||
| std::vector<int> vec; | ||
| vec.reserve(count); | ||
| for (size_t i = 0; i < count; ++i) { | ||
| vec.push_back(static_cast<int>(i)); | ||
| } | ||
| benchmark::DoNotOptimize(vec); | ||
| benchmark::ClobberMemory(); | ||
| } | ||
| } | ||
| BENCHMARK(BM_Vector_Reserve)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000); | ||
|
|
||
| // Benchmark: String concatenation (many allocations) | ||
| static void BM_String_Concatenation(benchmark::State& state) { | ||
| const size_t count = state.range(0); | ||
|
|
||
| for (auto _ : state) { | ||
| std::string result; | ||
| for (size_t i = 0; i < count; ++i) { | ||
| result += "x"; | ||
| } | ||
| benchmark::DoNotOptimize(result); | ||
| benchmark::ClobberMemory(); | ||
| } | ||
| } | ||
| BENCHMARK(BM_String_Concatenation)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000); | ||
|
|
||
| // Benchmark: String concatenation with reserve (optimized) | ||
| static void BM_String_Reserve(benchmark::State& state) { | ||
| const size_t count = state.range(0); | ||
|
|
||
| for (auto _ : state) { | ||
| std::string result; | ||
| result.reserve(count); | ||
| for (size_t i = 0; i < count; ++i) { | ||
| result += "x"; | ||
| } | ||
| benchmark::DoNotOptimize(result); | ||
| benchmark::ClobberMemory(); | ||
| } | ||
| } | ||
| BENCHMARK(BM_String_Reserve)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000); |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -186,15 +186,15 @@ State::State(std::string name, IterationCount max_iters, | |||||
| internal::ThreadTimer* timer, internal::ThreadManager* manager, | ||||||
| internal::PerfCountersMeasurement* perf_counters_measurement, | ||||||
| ProfilerManager* profiler_manager | ||||||
| #if defined(CODSPEED_SIMULATION) || defined(CODSPEED_WALLTIME) | ||||||
| #if defined(CODSPEED_ANALYSIS) || defined(CODSPEED_WALLTIME) | ||||||
| , | ||||||
| codspeed::CodSpeed* codspeed | ||||||
| #endif | ||||||
| ) | ||||||
| : total_iterations_(0), | ||||||
| batch_leftover_(0), | ||||||
| max_iterations(max_iters), | ||||||
| #if defined(CODSPEED_SIMULATION) || defined(CODSPEED_WALLTIME) | ||||||
| #if defined(CODSPEED_ANALYSIS) || defined(CODSPEED_WALLTIME) | ||||||
| codspeed_(codspeed), | ||||||
| #endif | ||||||
| #ifdef CODSPEED_WALLTIME | ||||||
|
|
@@ -462,7 +462,7 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks, | |||||
| #ifdef CODSPEED_ENABLED | ||||||
| auto& Err = display_reporter->GetErrorStream(); | ||||||
| // Determine the width of the name field using a minimum width of 10. | ||||||
| #ifdef CODSPEED_SIMULATION | ||||||
| #ifdef CODSPEED_ANALYSIS | ||||||
| Err << "Codspeed mode: simulation" << "\n"; | ||||||
|
||||||
| Err << "Codspeed mode: simulation" << "\n"; | |
| Err << "Codspeed mode: analysis" << "\n"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure if printing analysis is the best UX, since we said it should be only the internal build mode. I guess we could forward the real measurement mode and then print that, while also enabling CODSPEED_WALLTIME/CODSPEED_ANALYSIS. wdyt?
Uh oh!
There was an error while loading. Please reload this page.