diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 59813fb..bab5ac5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,7 +54,9 @@ jobs: strategy: matrix: include: - - codspeed-mode: "instrumentation" + - codspeed-mode: "simulation" + runner: "ubuntu-latest" + - codspeed-mode: "memory" runner: "ubuntu-latest" - codspeed-mode: "walltime" runner: "codspeed-macro" @@ -82,6 +84,12 @@ jobs: cmake -DCODSPEED_MODE=${{ matrix.codspeed-mode }} -DCODSPEED_STRICT_WARNINGS=ON .. make -j + - uses: dtolnay/rust-toolchain@stable + - name: Install the latest memtrack CLI + run: | + sudo apt-get update && sudo apt-get install -y autopoint bison flex + cargo install --git https://github.com/CodSpeedHQ/runner --branch main memtrack + - name: Run the benchmarks uses: CodSpeedHQ/action@main if: matrix.codspeed-mode != 'off' @@ -94,7 +102,9 @@ jobs: strategy: matrix: include: - - codspeed-mode: "instrumentation" + - codspeed-mode: "simulation" + runner: "ubuntu-latest" + - codspeed-mode: "memory" runner: "ubuntu-latest" - codspeed-mode: "walltime" runner: "codspeed-macro" @@ -120,6 +130,12 @@ jobs: run: | bazel build //examples/google_benchmark_bazel:my_benchmark --@codspeed_core//:codspeed_mode=${{ matrix.codspeed-mode }} --@codspeed_core//:strict_warnings=on + - uses: dtolnay/rust-toolchain@stable + - name: Install the latest memtrack CLI + run: | + sudo apt-get update && sudo apt-get install -y autopoint bison flex + cargo install --git https://github.com/CodSpeedHQ/runner --branch main memtrack + - name: Run the benchmarks uses: CodSpeedHQ/action@main if: matrix.codspeed-mode != 'off' diff --git a/core/BUILD b/core/BUILD index 25be1ad..3d366b3 100644 --- a/core/BUILD +++ b/core/BUILD @@ -81,9 +81,10 @@ cc_library( defines = [ "CODSPEED_VERSION=\\\"{}\\\"".format(CODSPEED_VERSION), ] + select({ - ":instrumentation_mode": ["CODSPEED_ENABLED", "CODSPEED_SIMULATION"], - ":simulation_mode": ["CODSPEED_ENABLED", "CODSPEED_SIMULATION"], - ":walltime_mode": ["CODSPEED_ENABLED", "CODSPEED_WALLTIME"], + ":instrumentation_mode": ["CODSPEED_ENABLED", "CODSPEED_ANALYSIS", "CODSPEED_MODE_DISPLAY=\\\"instrumentation\\\""], + ":simulation_mode": ["CODSPEED_ENABLED", "CODSPEED_ANALYSIS", "CODSPEED_MODE_DISPLAY=\\\"simulation\\\""], + ":memory_mode": ["CODSPEED_ENABLED", "CODSPEED_ANALYSIS", "CODSPEED_MODE_DISPLAY=\\\"memory\\\""], + ":walltime_mode": ["CODSPEED_ENABLED", "CODSPEED_WALLTIME", "CODSPEED_MODE_DISPLAY=\\\"walltime\\\""], "//conditions:default": [], }), deps = [":instrument_hooks"], @@ -98,6 +99,7 @@ string_flag( "off", "instrumentation", "simulation", + "memory", "walltime", ], visibility = ["//visibility:public"], @@ -113,6 +115,11 @@ config_setting( flag_values = {":codspeed_mode": "simulation"}, ) +config_setting( + name = "memory_mode", + flag_values = {":codspeed_mode": "memory"}, +) + config_setting( name = "walltime_mode", flag_values = {":codspeed_mode": "walltime"}, diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 555d563..a359dc1 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -143,7 +143,7 @@ target_compile_definitions( message(STATUS "Using codspeed root directory: ${CODSPEED_ROOT_DIR}") -set(CODSPEED_MODE_ALLOWED_VALUES "off" "instrumentation" "simulation" "walltime") +set(CODSPEED_MODE_ALLOWED_VALUES "off" "instrumentation" "simulation" "memory" "walltime") set(CODSPEED_MODE "off" CACHE STRING "Build mode for Codspeed") set_property( CACHE CODSPEED_MODE @@ -152,6 +152,7 @@ set_property( if(NOT CODSPEED_MODE STREQUAL "off") target_compile_definitions(codspeed PUBLIC -DCODSPEED_ENABLED) + target_compile_definitions(codspeed PUBLIC -DCODSPEED_MODE_DISPLAY="${CODSPEED_MODE}") if(NOT CMAKE_BUILD_TYPE) message( @@ -166,10 +167,10 @@ if(NOT CODSPEED_MODE STREQUAL "off") endif() # Define a preprocessor macro based on the build mode - if(CODSPEED_MODE STREQUAL "instrumentation" OR CODSPEED_MODE STREQUAL "simulation") + if(CODSPEED_MODE STREQUAL "instrumentation" OR CODSPEED_MODE STREQUAL "simulation" OR CODSPEED_MODE STREQUAL "memory") target_compile_definitions( codspeed - PUBLIC -DCODSPEED_SIMULATION + PUBLIC -DCODSPEED_ANALYSIS ) elseif(CODSPEED_MODE STREQUAL "walltime") target_compile_definitions(codspeed PUBLIC -DCODSPEED_WALLTIME) diff --git a/core/instrument-hooks b/core/instrument-hooks index b260b17..1460da1 160000 --- a/core/instrument-hooks +++ b/core/instrument-hooks @@ -1 +1 @@ -Subproject commit b260b17f1eb5a2e292c112c9d399c9cd5b42c65c +Subproject commit 1460da12ab87b17ec021706c7f084210269ed024 diff --git a/examples/google_benchmark_bazel/memory_bench.hpp b/examples/google_benchmark_bazel/memory_bench.hpp new file mode 120000 index 0000000..0e52303 --- /dev/null +++ b/examples/google_benchmark_bazel/memory_bench.hpp @@ -0,0 +1 @@ +../google_benchmark_cmake/memory_bench.hpp \ No newline at end of file diff --git a/examples/google_benchmark_cmake/main.cpp b/examples/google_benchmark_cmake/main.cpp index 0ebd56c..890b5ab 100644 --- a/examples/google_benchmark_cmake/main.cpp +++ b/examples/google_benchmark_cmake/main.cpp @@ -4,6 +4,7 @@ #include "fibonacci_bench.hpp" #include "fixture_bench.hpp" +#include "memory_bench.hpp" #include "multithread_bench.hpp" #include "pause_timing_bench.hpp" #include "sleep_bench.hpp" diff --git a/examples/google_benchmark_cmake/memory_bench.hpp b/examples/google_benchmark_cmake/memory_bench.hpp new file mode 100644 index 0000000..80abd3a --- /dev/null +++ b/examples/google_benchmark_cmake/memory_bench.hpp @@ -0,0 +1,197 @@ +#pragma once + +#include + +#include +#include + +// Run-length encoding: compress consecutive repeated characters +// Example: "aaabbbccc" -> "3a3b3c" +// NOTE: Intentionally inefficient - no pre-allocation to show multiple +// allocations +static std::string rle_encode(const std::string& input) { + if (input.empty()) return ""; + + std::string result; // No reserve - will trigger multiple reallocations + + char current = input[0]; + size_t count = 1; + + for (size_t i = 1; i < input.size(); ++i) { + if (input[i] == current) { + count++; + } else { + // Create intermediate strings for each run + std::string count_str = std::to_string(count); + std::string run_encoded = count_str + current; + result += run_encoded; // Concatenation causes reallocations + current = input[i]; + count = 1; + } + } + + // Final run + std::string count_str = std::to_string(count); + std::string final_run = count_str + current; + result += final_run; + + return result; +} + +// Run-length decoding: decompress RLE encoded string +// Example: "3a3b3c" -> "aaabbbccc" +static std::string rle_decode(const std::string& input) { + std::string result; + size_t i = 0; + + while (i < input.size()) { + // Parse the count + size_t count = 0; + while (i < input.size() && std::isdigit(input[i])) { + count = count * 10 + (input[i] - '0'); + i++; + } + + // Get the character + if (i < input.size()) { + char ch = input[i]; + result.append(count, ch); + i++; + } + } + + return result; +} + +// Generate a string with patterns for RLE +static std::string generate_rle_input(size_t size, size_t run_length) { + std::string result; + result.reserve(size); + + const std::string chars = "abcdefghijklmnopqrstuvwxyz"; + size_t char_idx = 0; + + while (result.size() < size) { + size_t count = std::min(run_length, size - result.size()); + result.append(count, chars[char_idx % chars.size()]); + char_idx++; + } + + return result; +} + +// Benchmark: RLE encoding with small runs (high compression) +static void BM_RLE_Encode_SmallRuns(benchmark::State& state) { + const size_t input_size = state.range(0); + std::string input = generate_rle_input(input_size, 3); + + for (auto _ : state) { + std::string encoded = rle_encode(input); + benchmark::DoNotOptimize(encoded); + benchmark::ClobberMemory(); + } + + state.SetBytesProcessed(state.iterations() * input_size); +} +BENCHMARK(BM_RLE_Encode_SmallRuns) + ->Arg(100) + ->Arg(1000) + ->Arg(10000) + ->Arg(100000); + +// Benchmark: RLE encoding with large runs (low compression) +static void BM_RLE_Encode_LargeRuns(benchmark::State& state) { + const size_t input_size = state.range(0); + std::string input = generate_rle_input(input_size, 100); + + for (auto _ : state) { + std::string encoded = rle_encode(input); + benchmark::DoNotOptimize(encoded); + benchmark::ClobberMemory(); + } + + state.SetBytesProcessed(state.iterations() * input_size); +} +BENCHMARK(BM_RLE_Encode_LargeRuns) + ->Arg(100) + ->Arg(1000) + ->Arg(10000) + ->Arg(100000); + +// Benchmark: RLE decoding +static void BM_RLE_Decode(benchmark::State& state) { + const size_t input_size = state.range(0); + std::string input = generate_rle_input(input_size, 10); + std::string encoded = rle_encode(input); + + for (auto _ : state) { + std::string decoded = rle_decode(encoded); + benchmark::DoNotOptimize(decoded); + benchmark::ClobberMemory(); + } + + state.SetBytesProcessed(state.iterations() * encoded.size()); +} +BENCHMARK(BM_RLE_Decode)->Arg(100)->Arg(1000)->Arg(10000)->Arg(100000); + +// Benchmark: Vector allocations (resizing pattern) +static void BM_Vector_PushBack(benchmark::State& state) { + const size_t count = state.range(0); + + for (auto _ : state) { + std::vector vec; + for (size_t i = 0; i < count; ++i) { + vec.push_back(static_cast(i)); + } + benchmark::DoNotOptimize(vec); + benchmark::ClobberMemory(); + } +} +BENCHMARK(BM_Vector_PushBack)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000); + +// Benchmark: Vector allocations with reserve (optimized) +static void BM_Vector_Reserve(benchmark::State& state) { + const size_t count = state.range(0); + + for (auto _ : state) { + std::vector vec; + vec.reserve(count); + for (size_t i = 0; i < count; ++i) { + vec.push_back(static_cast(i)); + } + benchmark::DoNotOptimize(vec); + benchmark::ClobberMemory(); + } +} +BENCHMARK(BM_Vector_Reserve)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000); + +// Benchmark: String concatenation (many allocations) +static void BM_String_Concatenation(benchmark::State& state) { + const size_t count = state.range(0); + + for (auto _ : state) { + std::string result; + for (size_t i = 0; i < count; ++i) { + result += "x"; + } + benchmark::DoNotOptimize(result); + benchmark::ClobberMemory(); + } +} +BENCHMARK(BM_String_Concatenation)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000); + +// Benchmark: String concatenation with reserve (optimized) +static void BM_String_Reserve(benchmark::State& state) { + const size_t count = state.range(0); + + for (auto _ : state) { + std::string result; + result.reserve(count); + for (size_t i = 0; i < count; ++i) { + result += "x"; + } + benchmark::DoNotOptimize(result); + benchmark::ClobberMemory(); + } +} +BENCHMARK(BM_String_Reserve)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000); diff --git a/google_benchmark/include/benchmark/benchmark.h b/google_benchmark/include/benchmark/benchmark.h index b7cc9c0..9aa4dc2 100644 --- a/google_benchmark/include/benchmark/benchmark.h +++ b/google_benchmark/include/benchmark/benchmark.h @@ -946,7 +946,7 @@ class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State { public: const IterationCount max_iterations; -#if defined(CODSPEED_SIMULATION) || defined(CODSPEED_WALLTIME) +#if defined(CODSPEED_ANALYSIS) || defined(CODSPEED_WALLTIME) codspeed::CodSpeed* codspeed_; #endif #ifdef CODSPEED_WALLTIME @@ -974,7 +974,7 @@ class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State { internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement, ProfilerManager* profiler_manager -#if defined(CODSPEED_SIMULATION) || defined(CODSPEED_WALLTIME) +#if defined(CODSPEED_ANALYSIS) || defined(CODSPEED_WALLTIME) , codspeed::CodSpeed* codspeed = NULL #endif @@ -1074,13 +1074,15 @@ struct State::StateIterator { if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) { return true; } -#ifdef CODSPEED_SIMULATION +#ifdef CODSPEED_ANALYSIS + if (!parent_->is_warmup_) { measurement_stop(); + } #endif parent_->FinishKeepRunning(); -#ifdef CODSPEED_SIMULATION - if (parent_->codspeed_ != NULL) { +#ifdef CODSPEED_ANALYSIS + if (parent_->codspeed_ != NULL && !parent_->is_warmup_) { parent_->codspeed_->end_benchmark(); } #endif @@ -1096,15 +1098,17 @@ inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() { return StateIterator(this); } inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() { -#ifdef CODSPEED_SIMULATION - if (this->codspeed_ != NULL) { +#ifdef CODSPEED_ANALYSIS + if (this->codspeed_ != NULL && !is_warmup_) { this->codspeed_->start_benchmark(name_); } #endif StartKeepRunning(); -#ifdef CODSPEED_SIMULATION - measurement_start(); +#ifdef CODSPEED_ANALYSIS + if (!is_warmup_) { + measurement_start(); + } #endif return StateIterator(); } diff --git a/google_benchmark/src/benchmark.cc b/google_benchmark/src/benchmark.cc index 179599e..0358c9a 100644 --- a/google_benchmark/src/benchmark.cc +++ b/google_benchmark/src/benchmark.cc @@ -186,7 +186,7 @@ State::State(std::string name, IterationCount max_iters, internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement, ProfilerManager* profiler_manager -#if defined(CODSPEED_SIMULATION) || defined(CODSPEED_WALLTIME) +#if defined(CODSPEED_ANALYSIS) || defined(CODSPEED_WALLTIME) , codspeed::CodSpeed* codspeed #endif @@ -196,7 +196,7 @@ State::State(std::string name, IterationCount max_iters, : total_iterations_(0), batch_leftover_(0), max_iterations(max_iters), -#if defined(CODSPEED_SIMULATION) || defined(CODSPEED_WALLTIME) +#if defined(CODSPEED_ANALYSIS) || defined(CODSPEED_WALLTIME) codspeed_(codspeed), #endif #ifdef CODSPEED_WALLTIME @@ -464,12 +464,7 @@ void RunBenchmarks(const std::vector& benchmarks, #ifdef CODSPEED_ENABLED auto& Err = display_reporter->GetErrorStream(); - // Determine the width of the name field using a minimum width of 10. -#ifdef CODSPEED_SIMULATION - Err << "Codspeed mode: simulation" << "\n"; -#elif defined(CODSPEED_WALLTIME) - Err << "Codspeed mode: walltime" << "\n"; -#endif + Err << "Codspeed mode: " << CODSPEED_MODE_DISPLAY << "\n"; #endif // CODSPEED_ENABLED bool might_have_aggregates = FLAGS_benchmark_repetitions > 1; diff --git a/google_benchmark/src/benchmark_api_internal.cc b/google_benchmark/src/benchmark_api_internal.cc index 4fa5612..4c85b7a 100644 --- a/google_benchmark/src/benchmark_api_internal.cc +++ b/google_benchmark/src/benchmark_api_internal.cc @@ -91,8 +91,8 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, teardown_ = benchmark_.teardown_; } -#ifdef CODSPEED_SIMULATION -State BenchmarkInstance::RunSimulation( +#ifdef CODSPEED_ANALYSIS +State BenchmarkInstance::RunAnalysis( codspeed::CodSpeed* codspeed, internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement, @@ -103,7 +103,7 @@ State BenchmarkInstance::RunSimulation( internal::ThreadTimer warmup_timer = internal::ThreadTimer::Create(); State warmup_state(name_.function_name, 1, args_, 0, 1, &warmup_timer, manager, perf_counters_measurement, profiler_manager, - NULL); + NULL, /*is_warmup=*/true ); benchmark_.Run(warmup_state); State st(name().str(), 1, args_, 0, 1, timer, manager, @@ -119,7 +119,7 @@ State BenchmarkInstance::__codspeed_root_frame__Run( internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement, ProfilerManager* profiler_manager, bool is_warmup) const { -#if defined(CODSPEED_SIMULATION) || defined(CODSPEED_WALLTIME) +#if defined(CODSPEED_ANALYSIS) || defined(CODSPEED_WALLTIME) State st(name_.function_name, iters, args_, thread_id, threads_, timer, manager, perf_counters_measurement, profiler_manager, codspeed::CodSpeed::getInstance(), is_warmup); #else diff --git a/google_benchmark/src/benchmark_api_internal.h b/google_benchmark/src/benchmark_api_internal.h index 3abce1e..c40f288 100644 --- a/google_benchmark/src/benchmark_api_internal.h +++ b/google_benchmark/src/benchmark_api_internal.h @@ -9,7 +9,7 @@ #include #include "benchmark/benchmark.h" -#ifdef CODSPEED_SIMULATION +#ifdef CODSPEED_ANALYSIS #include "codspeed.h" #endif #include "commandlineflags.h" @@ -38,7 +38,7 @@ class BenchmarkInstance { BigOFunc* complexity_lambda() const { return complexity_lambda_; } const std::vector& statistics() const { return statistics_; } int repetitions() const { -#ifdef CODSPEED_SIMULATION +#ifdef CODSPEED_ANALYSIS (void)repetitions_; return 1; #else @@ -57,8 +57,8 @@ class BenchmarkInstance { internal::PerfCountersMeasurement* perf_counters_measurement, ProfilerManager* profiler_manager, bool is_warmup = false) const; -#ifdef CODSPEED_SIMULATION - State RunSimulation( +#ifdef CODSPEED_ANALYSIS + State RunAnalysis( codspeed::CodSpeed* codspeed, internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement, diff --git a/google_benchmark/src/benchmark_runner.cc b/google_benchmark/src/benchmark_runner.cc index e31ec8a..2d31cca 100644 --- a/google_benchmark/src/benchmark_runner.cc +++ b/google_benchmark/src/benchmark_runner.cc @@ -461,13 +461,13 @@ void BenchmarkRunner::RunProfilerManager(IterationCount profile_iterations) { } void BenchmarkRunner::DoOneRepetition() { -#ifdef CODSPEED_SIMULATION +#ifdef CODSPEED_ANALYSIS std::unique_ptr manager; manager.reset(new internal::ThreadManager(b.threads())); internal::ThreadTimer timer = internal::ThreadTimer::Create(); b.Setup(); - State st = b.RunSimulation(codspeed::CodSpeed::getInstance(), &timer, - manager.get(), nullptr, nullptr); + State st = b.RunAnalysis(codspeed::CodSpeed::getInstance(), &timer, + manager.get(), nullptr, nullptr); b.Teardown(); return;