diff --git a/include/seastar/core/internal/cpu_profiler.hh b/include/seastar/core/internal/cpu_profiler.hh index 67047c3d995..d7a58af4b74 100644 --- a/include/seastar/core/internal/cpu_profiler.hh +++ b/include/seastar/core/internal/cpu_profiler.hh @@ -21,16 +21,17 @@ #pragma once +#include #include #include -#include +#include #include + #include #include #include #include -#include #include #include @@ -42,6 +43,9 @@ struct cpu_profiler_trace { using kernel_trace_vec = boost::container::static_vector; simple_backtrace user_backtrace; kernel_trace_vec kernel_backtrace; + // The scheduling group active at the time the same was taken. Note that + // non-task reactor work (such as polling) + scheduling_group sg; }; constexpr size_t max_number_of_traces = 128; @@ -72,7 +76,7 @@ private: }; // Temporarily enable/disable the CPU profiler from taking stacktraces on this thread, -// but don't disable the profiler completely. This can be used disable the profiler +// but don't disable the profiler completely. This can be used disable the profiler // for cases when taking a backtrace isn't valid (IE JIT generated code). void profiler_drop_stacktraces(bool) noexcept; @@ -108,7 +112,7 @@ struct cpu_profiler_stats { } unsigned sum_dropped() const { - return dropped_samples_from_manual_disablement + return dropped_samples_from_manual_disablement + dropped_samples_from_buffer_full + dropped_samples_from_exceptions + dropped_samples_from_mutex_contention; @@ -155,7 +159,7 @@ public: virtual void arm_timer(std::chrono::nanoseconds) = 0; virtual void disarm_timer() = 0; virtual bool is_spurious_signal() { return false; } - virtual std::optional + virtual std::optional try_get_kernel_backtrace() { return std::nullopt; } }; @@ -163,7 +167,7 @@ class cpu_profiler_posix_timer : public cpu_profiler { posix_timer _timer; public: cpu_profiler_posix_timer(cpu_profiler_config cfg) - : cpu_profiler(cfg) + : cpu_profiler(cfg) // CLOCK_MONOTONIC is used here in place of CLOCK_THREAD_CPUTIME_ID. // This is since for intervals of ~5ms or less CLOCK_THREAD_CPUTIME_ID // fires 200-600% after it's configured time. Therefore it is not granular @@ -172,7 +176,7 @@ public: // this issue. , _timer({signal_number()}, CLOCK_MONOTONIC) {} - virtual ~cpu_profiler_posix_timer() override = default; + virtual ~cpu_profiler_posix_timer() override = default; virtual void arm_timer(std::chrono::nanoseconds) override; virtual void disarm_timer() override; }; @@ -189,7 +193,7 @@ public: virtual void arm_timer(std::chrono::nanoseconds) override; virtual void disarm_timer() override; virtual bool is_spurious_signal() override; - virtual std::optional + virtual std::optional try_get_kernel_backtrace() override; }; diff --git a/src/core/cpu_profiler.cc b/src/core/cpu_profiler.cc index d020dc5dc9b..612f420a169 100644 --- a/src/core/cpu_profiler.cc +++ b/src/core/cpu_profiler.cc @@ -19,12 +19,13 @@ * Copyright (C) 2023 ScyllaDB */ -#include -#include -#include + #include + #include + #include -#include -#include + #include + #include + #include namespace seastar { seastar::logger cpu_profiler_logger("cpu_profiler"); @@ -38,7 +39,7 @@ signal_mutex::guard::~guard() { return; } // Ensure the subsequent store isn't hoisted by the the - // compiler into the critical section it's intended to + // compiler into the critical section it's intended to // protect. std::atomic_signal_fence(std::memory_order_release); _mutex->_mutex.store(false, std::memory_order_relaxed); @@ -52,7 +53,7 @@ std::optional signal_mutex::try_lock() { // by the compiler. std::atomic_signal_fence(std::memory_order_acq_rel); return {guard(this)}; - } + } return std::nullopt; } @@ -100,7 +101,7 @@ void cpu_profiler::update_config(cpu_profiler_config cfg) { auto is_stopped = _is_stopped; stop(); _cfg = cfg; - // Don't start the profiler if it's been explicitly + // Don't start the profiler if it's been explicitly // stopped elsewhere. if (!is_stopped) { start(); @@ -133,7 +134,7 @@ void cpu_profiler::on_signal() { // During exception handling in libgcc there is a critical section // where the stack is being modified so execution can be returned to - // a handler for the exception. This modification isn't capture by + // a handler for the exception. This modification isn't capture by // the eh_frames for the program though. So when libgcc's backtrace // enters the partially modified stack it will follow invalid addresses // and cause a segfault. To avoid this we check if any exception @@ -160,6 +161,7 @@ void cpu_profiler::on_signal() { } _traces.emplace_back(); _traces.back().user_backtrace = current_backtrace_tasklocal(); + _traces.back().sg = current_scheduling_group(); auto kernel_bt = try_get_kernel_backtrace(); if (kernel_bt) { @@ -178,7 +180,7 @@ void cpu_profiler::on_signal() { auto next = get_next_timeout(); arm_timer(next); } - + size_t cpu_profiler::results(std::vector& results_buffer) { // Since is this not called in the interrupt it should always succeed // in acquiring the lock. @@ -209,7 +211,7 @@ bool cpu_profiler_linux_perf_event::is_spurious_signal() { return _perf_event.is_spurious_signal(); } -std::optional +std::optional cpu_profiler_linux_perf_event::try_get_kernel_backtrace() { return _perf_event.try_get_kernel_backtrace(); } diff --git a/tests/unit/cpu_profiler_test.cc b/tests/unit/cpu_profiler_test.cc index 4206961b2bb..03a8ed0a201 100644 --- a/tests/unit/cpu_profiler_test.cc +++ b/tests/unit/cpu_profiler_test.cc @@ -19,22 +19,30 @@ * Copyright (C) 2023 ScyllaDB Ltd. */ -#include +#include +#include #include + #include #include +#include #include +#include #include -#include -#include +#include #include #include -#include -#include -#include +#include +#include #include "stall_detector_test_utilities.hh" +#include + +#include + +namespace { + struct temporary_profiler_settings { std::chrono::nanoseconds prev_ns; bool prev_enabled; @@ -64,9 +72,37 @@ bool close_to_expected(size_t actual_size, size_t expected_size, double allowed_ auto lower_bound = (1 - allowed_dev) * expected_size; auto upper_bound = (1 + allowed_dev) * expected_size; + BOOST_TEST_INFO("actual_size: " << actual_size << ", lower_bound " << lower_bound << ", upper_bound " << upper_bound); + return actual_size <= upper_bound && actual_size >= lower_bound; } +/* + * Get the current profile results and dropped count. If sg_in_main is true, also validates that + * the sg associated with the profile is always main, as we expect unless some SG have been + * created explicitly. + */ +std::pair, size_t> get_profile_and_dropped(bool sg_is_main = true) { + std::vector results; + auto dropped = engine().profiler_results(results); + + for (auto& result: results) { + BOOST_CHECK(result.sg == default_scheduling_group()); + } + + return {results, dropped}; +} + + +// get profile and validate results +std::vector get_profile() { + return get_profile_and_dropped().first; +} + +} + + + SEASTAR_THREAD_TEST_CASE(config_case) { // Ensure that repeatedly configuring the profiler results // in expected behavior. @@ -79,14 +115,12 @@ SEASTAR_THREAD_TEST_CASE(config_case) { spin_some_cooperatively(120*10ms); - std::vector results; - engine().profiler_results(results); + auto results = get_profile(); BOOST_REQUIRE(close_to_expected(results.size(), 12)); } - + spin_some_cooperatively(128*10ms); - std::vector results; - engine().profiler_results(results); + auto results = get_profile(); BOOST_REQUIRE_EQUAL(results.size(), 0); } @@ -95,21 +129,19 @@ SEASTAR_THREAD_TEST_CASE(simple_case) { spin_some_cooperatively(120*10ms); - std::vector results; - auto dropped_samples = engine().profiler_results(results); + auto [results, dropped_samples] = get_profile_and_dropped(); BOOST_REQUIRE(close_to_expected(results.size(), 12)); BOOST_REQUIRE_EQUAL(dropped_samples, 0); } SEASTAR_THREAD_TEST_CASE(overwrite_case) { - // Ensure that older samples are being overridden in + // Ensure that older samples are being overridden in // the cases where we can't collect results fast enough. temporary_profiler_settings cp{true, 10ms}; spin_some_cooperatively(256*10ms); - std::vector results; - auto dropped_samples = engine().profiler_results(results); + auto [results, dropped_samples] = get_profile_and_dropped(); // 128 is the maximum number of samples the profiler can // retain. BOOST_REQUIRE_EQUAL(results.size(), 128); @@ -130,8 +162,7 @@ SEASTAR_THREAD_TEST_CASE(mixed_case) { } BOOST_REQUIRE_EQUAL(reports, 5); - std::vector results; - engine().profiler_results(results); + auto results = get_profile(); BOOST_REQUIRE(close_to_expected(results.size(), 12)); } @@ -141,8 +172,7 @@ SEASTAR_THREAD_TEST_CASE(spin_in_kernel) { spin_some_cooperatively(100ms, [] { mmap_populate(128 * 1024); }); - std::vector results; - engine().profiler_results(results); + auto results = get_profile(); int count = 0; for(auto& result : results) { if(result.kernel_backtrace.size() > 0){ @@ -158,7 +188,7 @@ SEASTAR_THREAD_TEST_CASE(spin_in_kernel) { BOOST_REQUIRE(results.size() > 0); } -SEASTAR_THREAD_TEST_CASE(signal_mutex_basic) { +SEASTAR_THREAD_TEST_CASE(signal_mutex_basic) { // A very basic test that ensures the signal_mutex // can't be re-locked after it's already been acquired. internal::signal_mutex mutex; @@ -222,8 +252,7 @@ SEASTAR_THREAD_TEST_CASE(exception_handler_case) { random_exception_catcher(100, d(gen)); } - std::vector results; - auto dropped_samples = engine().profiler_results(results); + auto [results, dropped_samples] = get_profile_and_dropped(); BOOST_REQUIRE_EQUAL(results.size(), 128); BOOST_REQUIRE(dropped_samples > 0); } @@ -235,8 +264,7 @@ SEASTAR_THREAD_TEST_CASE(manually_disable) { spin_some_cooperatively(100ms); - std::vector results; - auto dropped_samples = engine().profiler_results(results); + auto [_, dropped_samples] = get_profile_and_dropped(); BOOST_REQUIRE(dropped_samples > 0); } @@ -255,7 +283,59 @@ SEASTAR_THREAD_TEST_CASE(config_thrashing) { spin_some_cooperatively(1us); } - std::vector results; - engine().profiler_results(results); + auto results = get_profile(); BOOST_REQUIRE(results.size() > 0); } + +SEASTAR_THREAD_TEST_CASE(scheduling_group_test) { + + [[maybe_unused]] auto sg_a = create_scheduling_group("sg_a", 200).get(); + [[maybe_unused]] auto sg_b = create_scheduling_group("sg_b", 200).get(); + + auto destoy_groups = defer([&]() noexcept { + destroy_scheduling_group(sg_b).get(); + destroy_scheduling_group(sg_a).get(); + }); + + temporary_profiler_settings cp{true, 100ms}; + + auto fut_a = with_scheduling_group(sg_a, [] { + return spin_some_cooperatively_coro(2100ms); + }); + + with_scheduling_group(sg_b, [] { + return spin_some_cooperatively_coro(2100ms); + }).get(); + + std::move(fut_a).get(); + + std::vector results; + auto dropped_samples = engine().profiler_results(results); + + size_t count_a = 0, count_b = 0, count_main = 0; + for (auto& r : results) { + if (r.sg == sg_a) { + ++count_a; + } else if (r.sg == sg_b) { + ++count_b; + } else if (r.sg == default_scheduling_group()) { + // this happens when the profiler triggers during non-task + // work, such as in the reactor pollers + ++count_main; + } else { + BOOST_TEST_FAIL("unexpected SG: " << r.sg.name()); + } + } + + // We expect a and b to be a 1:1 ratio, though we accept large + // variance since this is random sampling of two "randomly" scheduled + // groups so we don't really have the same guarantees we do in the + // single group case where we expect sort of +/- 1 due to the way we + // calculate the sampling intervals. + // Nominally the split is 10/10/0 for a/b/main, but we accept the + // below to keep flakiness to a minimum. + BOOST_CHECK_GT(count_a, 5); + BOOST_CHECK_GT(count_b, 5); + BOOST_CHECK_LT(count_main, 3); + BOOST_CHECK_EQUAL(dropped_samples, 0); +} diff --git a/tests/unit/stall_detector_test_utilities.hh b/tests/unit/stall_detector_test_utilities.hh index d42eff14805..d7e0cbf03e6 100644 --- a/tests/unit/stall_detector_test_utilities.hh +++ b/tests/unit/stall_detector_test_utilities.hh @@ -24,6 +24,9 @@ #include #include #include +#include "seastar/core/scheduling.hh" +#include "seastar/core/thread.hh" +#include "seastar/coroutine/maybe_yield.hh" #include #include #include @@ -88,6 +91,15 @@ void spin_some_cooperatively(std::chrono::duration how_much, void_fn bod } } +future<> spin_some_cooperatively_coro(std::chrono::duration how_much, void_fn body = []{}) { + auto end = std::chrono::steady_clock::now() + how_much; + while (std::chrono::steady_clock::now() < end) { + // fmt::print("GC: {}\n", current_scheduling_group().name()); + spin(200us, body); + co_await coroutine::maybe_yield(); + } +} + // Triggers stalls by spinning with a specify "body" function // which takes most of the spin time.