diff --git a/include/seastar/core/internal/cpu_profiler.hh b/include/seastar/core/internal/cpu_profiler.hh
index 67047c3d995..d7a58af4b74 100644
--- a/include/seastar/core/internal/cpu_profiler.hh
+++ b/include/seastar/core/internal/cpu_profiler.hh
@@ -21,16 +21,17 @@
 
 #pragma once
 
+#include <seastar/util/backtrace.hh>
 #include <seastar/core/circular_buffer_fixed_capacity.hh>
 #include <seastar/core/internal/timers.hh>
-#include <seastar/util/backtrace.hh>
+#include <seastar/core/scheduling.hh>
 
 #include <boost/container/static_vector.hpp>
+
 #include <chrono>
 #include <csignal>
 #include <cstdint>
 #include <ctime>
-#include <mutex>
 #include <atomic>
 #include <optional>
 
@@ -42,6 +43,9 @@ struct cpu_profiler_trace {
     using kernel_trace_vec = boost::container::static_vector<uintptr_t, 64>;
     simple_backtrace user_backtrace;
     kernel_trace_vec kernel_backtrace;
+    // The scheduling group active at the time the same was taken. Note that
+    // non-task reactor work (such as polling)
+    scheduling_group sg;
 };
 
 constexpr size_t max_number_of_traces = 128;
@@ -72,7 +76,7 @@ private:
 };
 
 // Temporarily enable/disable the CPU profiler from taking stacktraces on this thread,
-// but don't disable the profiler completely. This can be used disable the profiler 
+// but don't disable the profiler completely. This can be used disable the profiler
 // for cases when taking a backtrace isn't valid (IE JIT generated code).
 void profiler_drop_stacktraces(bool) noexcept;
 
@@ -108,7 +112,7 @@ struct cpu_profiler_stats {
     }
 
     unsigned sum_dropped() const {
-        return dropped_samples_from_manual_disablement 
+        return dropped_samples_from_manual_disablement
             + dropped_samples_from_buffer_full
             + dropped_samples_from_exceptions
             + dropped_samples_from_mutex_contention;
@@ -155,7 +159,7 @@ public:
     virtual void arm_timer(std::chrono::nanoseconds) = 0;
     virtual void disarm_timer() = 0;
     virtual bool is_spurious_signal() { return false; }
-    virtual std::optional<linux_perf_event::kernel_backtrace> 
+    virtual std::optional<linux_perf_event::kernel_backtrace>
     try_get_kernel_backtrace() { return std::nullopt; }
 };
 
@@ -163,7 +167,7 @@ class cpu_profiler_posix_timer : public cpu_profiler {
     posix_timer _timer;
 public:
     cpu_profiler_posix_timer(cpu_profiler_config cfg)
-            : cpu_profiler(cfg) 
+            : cpu_profiler(cfg)
             // CLOCK_MONOTONIC is used here in place of CLOCK_THREAD_CPUTIME_ID.
             // This is since for intervals of ~5ms or less CLOCK_THREAD_CPUTIME_ID
             // fires 200-600% after it's configured time. Therefore it is not granular
@@ -172,7 +176,7 @@ public:
             // this issue.
             , _timer({signal_number()}, CLOCK_MONOTONIC) {}
 
-    virtual ~cpu_profiler_posix_timer() override = default; 
+    virtual ~cpu_profiler_posix_timer() override = default;
     virtual void arm_timer(std::chrono::nanoseconds) override;
     virtual void disarm_timer() override;
 };
@@ -189,7 +193,7 @@ public:
     virtual void arm_timer(std::chrono::nanoseconds) override;
     virtual void disarm_timer() override;
     virtual bool is_spurious_signal() override;
-    virtual std::optional<linux_perf_event::kernel_backtrace> 
+    virtual std::optional<linux_perf_event::kernel_backtrace>
     try_get_kernel_backtrace() override;
 };
 
diff --git a/src/core/cpu_profiler.cc b/src/core/cpu_profiler.cc
index d020dc5dc9b..612f420a169 100644
--- a/src/core/cpu_profiler.cc
+++ b/src/core/cpu_profiler.cc
@@ -19,12 +19,13 @@
  * Copyright (C) 2023 ScyllaDB
  */
 
-#include <chrono>
-#include <optional>
-#include <random>
+ #include <chrono>
+ #include <optional>
+ #include <random>
 
-#include <seastar/core/internal/cpu_profiler.hh>
-#include <seastar/util/log.hh>
+ #include <seastar/core/internal/cpu_profiler.hh>
+ #include <seastar/core/scheduling.hh>
+ #include <seastar/util/log.hh>
 
 namespace seastar {
 seastar::logger cpu_profiler_logger("cpu_profiler");
@@ -38,7 +39,7 @@ signal_mutex::guard::~guard() {
         return;
     }
     // Ensure the subsequent store isn't hoisted by the the
-    // compiler into the critical section it's intended to 
+    // compiler into the critical section it's intended to
     // protect.
     std::atomic_signal_fence(std::memory_order_release);
     _mutex->_mutex.store(false, std::memory_order_relaxed);
@@ -52,7 +53,7 @@ std::optional<signal_mutex::guard> signal_mutex::try_lock() {
         // by the compiler.
         std::atomic_signal_fence(std::memory_order_acq_rel);
         return {guard(this)};
-    } 
+    }
 
     return std::nullopt;
 }
@@ -100,7 +101,7 @@ void cpu_profiler::update_config(cpu_profiler_config cfg) {
     auto is_stopped = _is_stopped;
     stop();
     _cfg = cfg;
-    // Don't start the profiler if it's been explicitly 
+    // Don't start the profiler if it's been explicitly
     // stopped elsewhere.
     if (!is_stopped) {
         start();
@@ -133,7 +134,7 @@ void cpu_profiler::on_signal() {
 
     // During exception handling in libgcc there is a critical section
     // where the stack is being modified so execution can be returned to
-    // a handler for the exception. This modification isn't capture by 
+    // a handler for the exception. This modification isn't capture by
     // the eh_frames for the program though. So when libgcc's backtrace
     // enters the partially modified stack it will follow invalid addresses
     // and cause a segfault. To avoid this we check if any exception
@@ -160,6 +161,7 @@ void cpu_profiler::on_signal() {
         }
         _traces.emplace_back();
         _traces.back().user_backtrace = current_backtrace_tasklocal();
+        _traces.back().sg = current_scheduling_group();
 
         auto kernel_bt = try_get_kernel_backtrace();
         if (kernel_bt) {
@@ -178,7 +180,7 @@ void cpu_profiler::on_signal() {
     auto next = get_next_timeout();
     arm_timer(next);
 }
- 
+
 size_t cpu_profiler::results(std::vector<cpu_profiler_trace>& results_buffer) {
     // Since is this not called in the interrupt it should always succeed
     // in acquiring the lock.
@@ -209,7 +211,7 @@ bool cpu_profiler_linux_perf_event::is_spurious_signal() {
     return _perf_event.is_spurious_signal();
 }
 
-std::optional<linux_perf_event::kernel_backtrace> 
+std::optional<linux_perf_event::kernel_backtrace>
 cpu_profiler_linux_perf_event::try_get_kernel_backtrace() {
     return _perf_event.try_get_kernel_backtrace();
 }
diff --git a/tests/unit/cpu_profiler_test.cc b/tests/unit/cpu_profiler_test.cc
index 4206961b2bb..03a8ed0a201 100644
--- a/tests/unit/cpu_profiler_test.cc
+++ b/tests/unit/cpu_profiler_test.cc
@@ -19,22 +19,30 @@
  * Copyright (C) 2023 ScyllaDB Ltd.
  */
 
-#include <boost/test/tools/old/interface.hpp>
+#include <atomic>
+#include <chrono>
 #include <cstddef>
+
 #include <seastar/core/internal/cpu_profiler.hh>
 #include <seastar/core/internal/stall_detector.hh>
+#include <seastar/core/loop.hh>
 #include <seastar/core/reactor.hh>
+#include <seastar/core/scheduling.hh>
 #include <seastar/core/thread_cputime_clock.hh>
-#include <seastar/core/loop.hh>
-#include <seastar/util/later.hh>
+#include <seastar/core/with_scheduling_group.hh>
 #include <seastar/testing/test_case.hh>
 #include <seastar/testing/thread_test_case.hh>
-#include <atomic>
-#include <chrono>
-#include <sys/mman.h>
+#include <seastar/util/defer.hh>
+#include <seastar/util/later.hh>
 
 #include "stall_detector_test_utilities.hh"
 
+#include <sys/mman.h>
+
+#include <boost/test/tools/old/interface.hpp>
+
+namespace {
+
 struct temporary_profiler_settings {
     std::chrono::nanoseconds prev_ns;
     bool prev_enabled;
@@ -64,9 +72,37 @@ bool close_to_expected(size_t actual_size, size_t expected_size, double allowed_
     auto lower_bound = (1 - allowed_dev) * expected_size;
     auto upper_bound = (1 + allowed_dev) * expected_size;
 
+    BOOST_TEST_INFO("actual_size: " << actual_size << ", lower_bound " << lower_bound << ", upper_bound " << upper_bound);
+
     return actual_size <= upper_bound && actual_size >= lower_bound;
 }
 
+/*
+ * Get the current profile results and dropped count. If sg_in_main is true, also validates that
+ * the sg associated with the profile is always main, as we expect unless some SG have been
+ * created explicitly.
+ */
+std::pair<std::vector<cpu_profiler_trace>, size_t> get_profile_and_dropped(bool sg_is_main = true) {
+    std::vector<cpu_profiler_trace> results;
+    auto dropped = engine().profiler_results(results);
+
+    for (auto& result: results) {
+        BOOST_CHECK(result.sg == default_scheduling_group());
+    }
+
+    return {results, dropped};
+}
+
+
+// get profile and validate results
+std::vector<cpu_profiler_trace> get_profile() {
+    return get_profile_and_dropped().first;
+}
+
+}
+
+
+
 SEASTAR_THREAD_TEST_CASE(config_case) {
     // Ensure that repeatedly configuring the profiler results
     // in expected behavior.
@@ -79,14 +115,12 @@ SEASTAR_THREAD_TEST_CASE(config_case) {
 
         spin_some_cooperatively(120*10ms);
 
-        std::vector<cpu_profiler_trace> results;
-        engine().profiler_results(results); 
+        auto results = get_profile();
         BOOST_REQUIRE(close_to_expected(results.size(), 12));
     }
-    
+
     spin_some_cooperatively(128*10ms);
-    std::vector<cpu_profiler_trace> results;
-    engine().profiler_results(results); 
+    auto results = get_profile();
     BOOST_REQUIRE_EQUAL(results.size(), 0);
 }
 
@@ -95,21 +129,19 @@ SEASTAR_THREAD_TEST_CASE(simple_case) {
 
     spin_some_cooperatively(120*10ms);
 
-    std::vector<cpu_profiler_trace> results;
-    auto dropped_samples = engine().profiler_results(results); 
+    auto [results, dropped_samples] = get_profile_and_dropped();
     BOOST_REQUIRE(close_to_expected(results.size(), 12));
     BOOST_REQUIRE_EQUAL(dropped_samples, 0);
 }
 
 SEASTAR_THREAD_TEST_CASE(overwrite_case) {
-    // Ensure that older samples are being overridden in 
+    // Ensure that older samples are being overridden in
     // the cases where we can't collect results fast enough.
     temporary_profiler_settings cp{true, 10ms};
 
     spin_some_cooperatively(256*10ms);
 
-    std::vector<cpu_profiler_trace> results;
-    auto dropped_samples = engine().profiler_results(results); 
+    auto [results, dropped_samples] = get_profile_and_dropped();
     // 128 is the maximum number of samples the profiler can
     // retain.
     BOOST_REQUIRE_EQUAL(results.size(), 128);
@@ -130,8 +162,7 @@ SEASTAR_THREAD_TEST_CASE(mixed_case) {
     }
 
     BOOST_REQUIRE_EQUAL(reports, 5);
-    std::vector<cpu_profiler_trace> results;
-    engine().profiler_results(results); 
+    auto results = get_profile();
     BOOST_REQUIRE(close_to_expected(results.size(), 12));
 }
 
@@ -141,8 +172,7 @@ SEASTAR_THREAD_TEST_CASE(spin_in_kernel) {
 
     spin_some_cooperatively(100ms, [] { mmap_populate(128 * 1024); });
 
-    std::vector<cpu_profiler_trace> results;
-    engine().profiler_results(results); 
+    auto results = get_profile();
     int count = 0;
     for(auto& result : results) {
         if(result.kernel_backtrace.size() > 0){
@@ -158,7 +188,7 @@ SEASTAR_THREAD_TEST_CASE(spin_in_kernel) {
     BOOST_REQUIRE(results.size() > 0);
 }
 
-SEASTAR_THREAD_TEST_CASE(signal_mutex_basic) { 
+SEASTAR_THREAD_TEST_CASE(signal_mutex_basic) {
     // A very basic test that ensures the signal_mutex
     // can't be re-locked after it's already been acquired.
     internal::signal_mutex mutex;
@@ -222,8 +252,7 @@ SEASTAR_THREAD_TEST_CASE(exception_handler_case) {
     random_exception_catcher(100, d(gen));
   }
 
-  std::vector<cpu_profiler_trace> results;
-  auto dropped_samples = engine().profiler_results(results);
+  auto [results, dropped_samples] = get_profile_and_dropped();
   BOOST_REQUIRE_EQUAL(results.size(), 128);
   BOOST_REQUIRE(dropped_samples > 0);
 }
@@ -235,8 +264,7 @@ SEASTAR_THREAD_TEST_CASE(manually_disable) {
 
   spin_some_cooperatively(100ms);
 
-  std::vector<cpu_profiler_trace> results;
-  auto dropped_samples = engine().profiler_results(results);
+  auto [_, dropped_samples] = get_profile_and_dropped();
   BOOST_REQUIRE(dropped_samples > 0);
 }
 
@@ -255,7 +283,59 @@ SEASTAR_THREAD_TEST_CASE(config_thrashing) {
     spin_some_cooperatively(1us);
   }
 
-  std::vector<cpu_profiler_trace> results;
-  engine().profiler_results(results);
+  auto results = get_profile();
   BOOST_REQUIRE(results.size() > 0);
 }
+
+SEASTAR_THREAD_TEST_CASE(scheduling_group_test) {
+
+    [[maybe_unused]] auto sg_a = create_scheduling_group("sg_a", 200).get();
+    [[maybe_unused]] auto sg_b = create_scheduling_group("sg_b", 200).get();
+
+    auto destoy_groups = defer([&]() noexcept {
+        destroy_scheduling_group(sg_b).get();
+        destroy_scheduling_group(sg_a).get();
+    });
+
+    temporary_profiler_settings cp{true, 100ms};
+
+    auto fut_a = with_scheduling_group(sg_a, [] {
+        return spin_some_cooperatively_coro(2100ms);
+    });
+
+    with_scheduling_group(sg_b, [] {
+        return spin_some_cooperatively_coro(2100ms);
+    }).get();
+
+    std::move(fut_a).get();
+
+    std::vector<cpu_profiler_trace> results;
+    auto dropped_samples = engine().profiler_results(results);
+
+    size_t count_a = 0, count_b = 0, count_main = 0;
+    for (auto& r : results) {
+        if (r.sg == sg_a) {
+            ++count_a;
+        } else if (r.sg == sg_b) {
+            ++count_b;
+        } else if (r.sg == default_scheduling_group()) {
+            // this happens when the profiler triggers during non-task
+            // work, such as in the reactor pollers
+            ++count_main;
+        } else {
+            BOOST_TEST_FAIL("unexpected SG: " << r.sg.name());
+        }
+    }
+
+    // We expect a and b to be a 1:1 ratio, though we accept large
+    // variance since this is random sampling of two "randomly" scheduled
+    // groups so we don't really have the same guarantees we do in the
+    // single group case where we expect sort of +/- 1 due to the way we
+    // calculate the sampling intervals.
+    // Nominally the split is 10/10/0 for a/b/main, but we accept the
+    // below to keep flakiness to a minimum.
+    BOOST_CHECK_GT(count_a, 5);
+    BOOST_CHECK_GT(count_b, 5);
+    BOOST_CHECK_LT(count_main, 3);
+    BOOST_CHECK_EQUAL(dropped_samples, 0);
+}
diff --git a/tests/unit/stall_detector_test_utilities.hh b/tests/unit/stall_detector_test_utilities.hh
index d42eff14805..d7e0cbf03e6 100644
--- a/tests/unit/stall_detector_test_utilities.hh
+++ b/tests/unit/stall_detector_test_utilities.hh
@@ -24,6 +24,9 @@
 #include <cstddef>
 #include <seastar/core/internal/stall_detector.hh>
 #include <seastar/core/reactor.hh>
+#include "seastar/core/scheduling.hh"
+#include "seastar/core/thread.hh"
+#include "seastar/coroutine/maybe_yield.hh"
 #include <seastar/core/thread_cputime_clock.hh>
 #include <seastar/core/loop.hh>
 #include <seastar/util/later.hh>
@@ -88,6 +91,15 @@ void spin_some_cooperatively(std::chrono::duration<double> how_much, void_fn bod
     }
 }
 
+future<> spin_some_cooperatively_coro(std::chrono::duration<double> how_much, void_fn body = []{}) {
+    auto end = std::chrono::steady_clock::now() + how_much;
+    while (std::chrono::steady_clock::now() < end) {
+        // fmt::print("GC: {}\n", current_scheduling_group().name());
+        spin(200us, body);
+        co_await coroutine::maybe_yield();
+    }
+}
+
 
 // Triggers stalls by spinning with a specify "body" function
 // which takes most of the spin time.