Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add scheduler group to cpu profiler #169

Merged
merged 3 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions include/seastar/core/internal/cpu_profiler.hh
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,17 @@

#pragma once

#include <seastar/util/backtrace.hh>
#include <seastar/core/circular_buffer_fixed_capacity.hh>
#include <seastar/core/internal/timers.hh>
#include <seastar/util/backtrace.hh>
#include <seastar/core/scheduling.hh>

#include <boost/container/static_vector.hpp>

#include <chrono>
#include <csignal>
#include <cstdint>
#include <ctime>
#include <mutex>
#include <atomic>
#include <optional>

Expand All @@ -42,6 +43,9 @@ struct cpu_profiler_trace {
using kernel_trace_vec = boost::container::static_vector<uintptr_t, 64>;
simple_backtrace user_backtrace;
kernel_trace_vec kernel_backtrace;
// The scheduling group active at the time the same was taken. Note that
// non-task reactor work (such as polling)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sentence incomplete

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks will fix

scheduling_group sg;
};

constexpr size_t max_number_of_traces = 128;
Expand Down Expand Up @@ -72,7 +76,7 @@ private:
};

// Temporarily enable/disable the CPU profiler from taking stacktraces on this thread,
// but don't disable the profiler completely. This can be used disable the profiler
// but don't disable the profiler completely. This can be used disable the profiler
// for cases when taking a backtrace isn't valid (IE JIT generated code).
void profiler_drop_stacktraces(bool) noexcept;

Expand Down Expand Up @@ -108,7 +112,7 @@ struct cpu_profiler_stats {
}

unsigned sum_dropped() const {
return dropped_samples_from_manual_disablement
return dropped_samples_from_manual_disablement
+ dropped_samples_from_buffer_full
+ dropped_samples_from_exceptions
+ dropped_samples_from_mutex_contention;
Expand Down Expand Up @@ -155,15 +159,15 @@ public:
virtual void arm_timer(std::chrono::nanoseconds) = 0;
virtual void disarm_timer() = 0;
virtual bool is_spurious_signal() { return false; }
virtual std::optional<linux_perf_event::kernel_backtrace>
virtual std::optional<linux_perf_event::kernel_backtrace>
try_get_kernel_backtrace() { return std::nullopt; }
};

class cpu_profiler_posix_timer : public cpu_profiler {
posix_timer _timer;
public:
cpu_profiler_posix_timer(cpu_profiler_config cfg)
: cpu_profiler(cfg)
: cpu_profiler(cfg)
// CLOCK_MONOTONIC is used here in place of CLOCK_THREAD_CPUTIME_ID.
// This is since for intervals of ~5ms or less CLOCK_THREAD_CPUTIME_ID
// fires 200-600% after it's configured time. Therefore it is not granular
Expand All @@ -172,7 +176,7 @@ public:
// this issue.
, _timer({signal_number()}, CLOCK_MONOTONIC) {}

virtual ~cpu_profiler_posix_timer() override = default;
virtual ~cpu_profiler_posix_timer() override = default;
virtual void arm_timer(std::chrono::nanoseconds) override;
virtual void disarm_timer() override;
};
Expand All @@ -189,7 +193,7 @@ public:
virtual void arm_timer(std::chrono::nanoseconds) override;
virtual void disarm_timer() override;
virtual bool is_spurious_signal() override;
virtual std::optional<linux_perf_event::kernel_backtrace>
virtual std::optional<linux_perf_event::kernel_backtrace>
try_get_kernel_backtrace() override;
};

Expand Down
24 changes: 13 additions & 11 deletions src/core/cpu_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
* Copyright (C) 2023 ScyllaDB
*/

#include <chrono>
#include <optional>
#include <random>
#include <chrono>
#include <optional>
#include <random>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like wrong formatting to me

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

silly clang format, it's trying to match the alignment of the comment above or somthing, will fix


#include <seastar/core/internal/cpu_profiler.hh>
#include <seastar/util/log.hh>
#include <seastar/core/internal/cpu_profiler.hh>
#include <seastar/core/scheduling.hh>
#include <seastar/util/log.hh>

namespace seastar {
seastar::logger cpu_profiler_logger("cpu_profiler");
Expand All @@ -38,7 +39,7 @@ signal_mutex::guard::~guard() {
return;
}
// Ensure the subsequent store isn't hoisted by the the
// compiler into the critical section it's intended to
// compiler into the critical section it's intended to
// protect.
std::atomic_signal_fence(std::memory_order_release);
_mutex->_mutex.store(false, std::memory_order_relaxed);
Expand All @@ -52,7 +53,7 @@ std::optional<signal_mutex::guard> signal_mutex::try_lock() {
// by the compiler.
std::atomic_signal_fence(std::memory_order_acq_rel);
return {guard(this)};
}
}

return std::nullopt;
}
Expand Down Expand Up @@ -100,7 +101,7 @@ void cpu_profiler::update_config(cpu_profiler_config cfg) {
auto is_stopped = _is_stopped;
stop();
_cfg = cfg;
// Don't start the profiler if it's been explicitly
// Don't start the profiler if it's been explicitly
// stopped elsewhere.
if (!is_stopped) {
start();
Expand Down Expand Up @@ -133,7 +134,7 @@ void cpu_profiler::on_signal() {

// During exception handling in libgcc there is a critical section
// where the stack is being modified so execution can be returned to
// a handler for the exception. This modification isn't capture by
// a handler for the exception. This modification isn't capture by
// the eh_frames for the program though. So when libgcc's backtrace
// enters the partially modified stack it will follow invalid addresses
// and cause a segfault. To avoid this we check if any exception
Expand All @@ -160,6 +161,7 @@ void cpu_profiler::on_signal() {
}
_traces.emplace_back();
_traces.back().user_backtrace = current_backtrace_tasklocal();
_traces.back().sg = current_scheduling_group();

auto kernel_bt = try_get_kernel_backtrace();
if (kernel_bt) {
Expand All @@ -178,7 +180,7 @@ void cpu_profiler::on_signal() {
auto next = get_next_timeout();
arm_timer(next);
}

size_t cpu_profiler::results(std::vector<cpu_profiler_trace>& results_buffer) {
// Since is this not called in the interrupt it should always succeed
// in acquiring the lock.
Expand Down Expand Up @@ -209,7 +211,7 @@ bool cpu_profiler_linux_perf_event::is_spurious_signal() {
return _perf_event.is_spurious_signal();
}

std::optional<linux_perf_event::kernel_backtrace>
std::optional<linux_perf_event::kernel_backtrace>
cpu_profiler_linux_perf_event::try_get_kernel_backtrace() {
return _perf_event.try_get_kernel_backtrace();
}
Expand Down
134 changes: 107 additions & 27 deletions tests/unit/cpu_profiler_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,30 @@
* Copyright (C) 2023 ScyllaDB Ltd.
*/

#include <boost/test/tools/old/interface.hpp>
#include <atomic>
#include <chrono>
#include <cstddef>

#include <seastar/core/internal/cpu_profiler.hh>
#include <seastar/core/internal/stall_detector.hh>
#include <seastar/core/loop.hh>
#include <seastar/core/reactor.hh>
#include <seastar/core/scheduling.hh>
#include <seastar/core/thread_cputime_clock.hh>
#include <seastar/core/loop.hh>
#include <seastar/util/later.hh>
#include <seastar/core/with_scheduling_group.hh>
#include <seastar/testing/test_case.hh>
#include <seastar/testing/thread_test_case.hh>
#include <atomic>
#include <chrono>
#include <sys/mman.h>
#include <seastar/util/defer.hh>
#include <seastar/util/later.hh>

#include "stall_detector_test_utilities.hh"

#include <sys/mman.h>

#include <boost/test/tools/old/interface.hpp>

namespace {

struct temporary_profiler_settings {
std::chrono::nanoseconds prev_ns;
bool prev_enabled;
Expand Down Expand Up @@ -64,9 +72,37 @@ bool close_to_expected(size_t actual_size, size_t expected_size, double allowed_
auto lower_bound = (1 - allowed_dev) * expected_size;
auto upper_bound = (1 + allowed_dev) * expected_size;

BOOST_TEST_INFO("actual_size: " << actual_size << ", lower_bound " << lower_bound << ", upper_bound " << upper_bound);

return actual_size <= upper_bound && actual_size >= lower_bound;
}

/*
* Get the current profile results and dropped count. If sg_in_main is true, also validates that
* the sg associated with the profile is always main, as we expect unless some SG have been
* created explicitly.
*/
std::pair<std::vector<cpu_profiler_trace>, size_t> get_profile_and_dropped(bool sg_is_main = true) {
std::vector<cpu_profiler_trace> results;
auto dropped = engine().profiler_results(results);

for (auto& result: results) {
BOOST_CHECK(result.sg == default_scheduling_group());
}

return {results, dropped};
}


// get profile and validate results
std::vector<cpu_profiler_trace> get_profile() {
return get_profile_and_dropped().first;
}

}



SEASTAR_THREAD_TEST_CASE(config_case) {
// Ensure that repeatedly configuring the profiler results
// in expected behavior.
Expand All @@ -79,14 +115,12 @@ SEASTAR_THREAD_TEST_CASE(config_case) {

spin_some_cooperatively(120*10ms);

std::vector<cpu_profiler_trace> results;
engine().profiler_results(results);
auto results = get_profile();
BOOST_REQUIRE(close_to_expected(results.size(), 12));
}

spin_some_cooperatively(128*10ms);
std::vector<cpu_profiler_trace> results;
engine().profiler_results(results);
auto results = get_profile();
BOOST_REQUIRE_EQUAL(results.size(), 0);
}

Expand All @@ -95,21 +129,19 @@ SEASTAR_THREAD_TEST_CASE(simple_case) {

spin_some_cooperatively(120*10ms);

std::vector<cpu_profiler_trace> results;
auto dropped_samples = engine().profiler_results(results);
auto [results, dropped_samples] = get_profile_and_dropped();
BOOST_REQUIRE(close_to_expected(results.size(), 12));
BOOST_REQUIRE_EQUAL(dropped_samples, 0);
}

SEASTAR_THREAD_TEST_CASE(overwrite_case) {
// Ensure that older samples are being overridden in
// Ensure that older samples are being overridden in
// the cases where we can't collect results fast enough.
temporary_profiler_settings cp{true, 10ms};

spin_some_cooperatively(256*10ms);

std::vector<cpu_profiler_trace> results;
auto dropped_samples = engine().profiler_results(results);
auto [results, dropped_samples] = get_profile_and_dropped();
// 128 is the maximum number of samples the profiler can
// retain.
BOOST_REQUIRE_EQUAL(results.size(), 128);
Expand All @@ -130,8 +162,7 @@ SEASTAR_THREAD_TEST_CASE(mixed_case) {
}

BOOST_REQUIRE_EQUAL(reports, 5);
std::vector<cpu_profiler_trace> results;
engine().profiler_results(results);
auto results = get_profile();
BOOST_REQUIRE(close_to_expected(results.size(), 12));
}

Expand All @@ -141,8 +172,7 @@ SEASTAR_THREAD_TEST_CASE(spin_in_kernel) {

spin_some_cooperatively(100ms, [] { mmap_populate(128 * 1024); });

std::vector<cpu_profiler_trace> results;
engine().profiler_results(results);
auto results = get_profile();
int count = 0;
for(auto& result : results) {
if(result.kernel_backtrace.size() > 0){
Expand All @@ -158,7 +188,7 @@ SEASTAR_THREAD_TEST_CASE(spin_in_kernel) {
BOOST_REQUIRE(results.size() > 0);
}

SEASTAR_THREAD_TEST_CASE(signal_mutex_basic) {
SEASTAR_THREAD_TEST_CASE(signal_mutex_basic) {
// A very basic test that ensures the signal_mutex
// can't be re-locked after it's already been acquired.
internal::signal_mutex mutex;
Expand Down Expand Up @@ -222,8 +252,7 @@ SEASTAR_THREAD_TEST_CASE(exception_handler_case) {
random_exception_catcher(100, d(gen));
}

std::vector<cpu_profiler_trace> results;
auto dropped_samples = engine().profiler_results(results);
auto [results, dropped_samples] = get_profile_and_dropped();
BOOST_REQUIRE_EQUAL(results.size(), 128);
BOOST_REQUIRE(dropped_samples > 0);
}
Expand All @@ -235,8 +264,7 @@ SEASTAR_THREAD_TEST_CASE(manually_disable) {

spin_some_cooperatively(100ms);

std::vector<cpu_profiler_trace> results;
auto dropped_samples = engine().profiler_results(results);
auto [_, dropped_samples] = get_profile_and_dropped();
BOOST_REQUIRE(dropped_samples > 0);
}

Expand All @@ -255,7 +283,59 @@ SEASTAR_THREAD_TEST_CASE(config_thrashing) {
spin_some_cooperatively(1us);
}

std::vector<cpu_profiler_trace> results;
engine().profiler_results(results);
auto results = get_profile();
BOOST_REQUIRE(results.size() > 0);
}

SEASTAR_THREAD_TEST_CASE(scheduling_group_test) {

[[maybe_unused]] auto sg_a = create_scheduling_group("sg_a", 200).get();
[[maybe_unused]] auto sg_b = create_scheduling_group("sg_b", 200).get();

auto destoy_groups = defer([&]() noexcept {
destroy_scheduling_group(sg_b).get();
destroy_scheduling_group(sg_a).get();
});

temporary_profiler_settings cp{true, 100ms};

auto fut_a = with_scheduling_group(sg_a, [] {
return spin_some_cooperatively_coro(2100ms);
});

with_scheduling_group(sg_b, [] {
return spin_some_cooperatively_coro(2100ms);
}).get();

std::move(fut_a).get();

std::vector<cpu_profiler_trace> results;
auto dropped_samples = engine().profiler_results(results);

size_t count_a = 0, count_b = 0, count_main = 0;
for (auto& r : results) {
if (r.sg == sg_a) {
++count_a;
} else if (r.sg == sg_b) {
++count_b;
} else if (r.sg == default_scheduling_group()) {
// this happens when the profiler triggers during non-task
// work, such as in the reactor pollers
++count_main;
} else {
BOOST_TEST_FAIL("unexpected SG: " << r.sg.name());
}
}

// We expect a and b to be a 1:1 ratio, though we accept large
// variance since this is random sampling of two "randomly" scheduled
// groups so we don't really have the same guarantees we do in the
// single group case where we expect sort of +/- 1 due to the way we
// calculate the sampling intervals.
// Nominally the split is 10/10/0 for a/b/main, but we accept the
// below to keep flakiness to a minimum.
BOOST_CHECK_GT(count_a, 5);
BOOST_CHECK_GT(count_b, 5);
BOOST_CHECK_LT(count_main, 3);
BOOST_CHECK_EQUAL(dropped_samples, 0);
}
Loading
Loading