Skip to content

Commit d03a004

Browse files
committed
feat: Enable gdb debugging on x86
Enabling GDB support for debugging the guest kernel. This allows us to connect a gdb server to firecracker and debug the guest. Signed-off-by: Jack Thomson <[email protected]>
1 parent 7803c42 commit d03a004

File tree

12 files changed

+1203
-10
lines changed

12 files changed

+1203
-10
lines changed

Cargo.lock

+39
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

resources/seccomp/x86_64-unknown-linux-musl.json

+24
Original file line numberDiff line numberDiff line change
@@ -1238,6 +1238,30 @@
12381238
}
12391239
]
12401240
},
1241+
{
1242+
"syscall": "ioctl",
1243+
"args": [
1244+
{
1245+
"index": 1,
1246+
"type": "dword",
1247+
"op": "eq",
1248+
"val": 1078505115,
1249+
"comment": "KVM_SET_GUEST_DEBUG"
1250+
}
1251+
]
1252+
},
1253+
{
1254+
"syscall": "ioctl",
1255+
"args": [
1256+
{
1257+
"index": 1,
1258+
"type": "dword",
1259+
"op": "eq",
1260+
"val": 3222843013,
1261+
"comment": "KVM_TRANSLATE"
1262+
}
1263+
]
1264+
},
12411265
{
12421266
"syscall": "ioctl",
12431267
"args": [

src/firecracker/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ serde_json = "1.0.128"
4949

5050
[features]
5151
tracing = ["log-instrument", "seccompiler/tracing", "utils/tracing", "vmm/tracing"]
52+
debug = ["vmm/debug"]
5253

5354
[lints]
5455
workspace = true

src/vmm/Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ bench = false
1111
[dependencies]
1212
acpi_tables = { path = "../acpi-tables" }
1313
aes-gcm = { version = "0.10.1", default-features = false, features = ["aes"] }
14+
arrayvec = { version = "0.7.6", optional = true }
1415
aws-lc-rs = { version = "1.9.0", features = ["bindgen"] }
1516
base64 = "0.22.1"
1617
bincode = "1.2.1"
@@ -19,6 +20,8 @@ crc64 = "2.0.0"
1920
derive_more = { version = "1.0.0", default-features = false, features = ["from", "display"] }
2021
displaydoc = "0.2.5"
2122
event-manager = "0.4.0"
23+
gdbstub = { version = "0.7.2", optional = true }
24+
gdbstub_arch = { version = "0.3.0", optional = true }
2225
kvm-bindings = { version = "0.9.1", features = ["fam-wrappers", "serde"] }
2326
kvm-ioctls = "0.18.0"
2427
lazy_static = "1.5.0"
@@ -56,7 +59,9 @@ itertools = "0.13.0"
5659
proptest = { version = "1.5.0", default-features = false, features = ["std"] }
5760

5861
[features]
62+
default = []
5963
tracing = ["log-instrument"]
64+
debug = ["arrayvec", "gdbstub", "gdbstub_arch"]
6065

6166
[[bench]]
6267
name = "cpu_templates"

src/vmm/src/builder.rs

+37-10
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
use std::convert::TryFrom;
88
use std::fmt::Debug;
99
use std::io::{self, Seek, SeekFrom};
10+
#[cfg(feature = "debug")]
11+
use std::sync::mpsc;
1012
use std::sync::{Arc, Mutex};
1113

1214
use event_manager::{MutEventSubscriber, SubscriberOps};
@@ -56,6 +58,8 @@ use crate::devices::virtio::net::Net;
5658
use crate::devices::virtio::rng::Entropy;
5759
use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend};
5860
use crate::devices::BusDevice;
61+
#[cfg(feature = "debug")]
62+
use crate::gdb;
5963
use crate::logger::{debug, error};
6064
use crate::persist::{MicrovmState, MicrovmStateError};
6165
use crate::resources::VmResources;
@@ -128,6 +132,9 @@ pub enum StartMicrovmError {
128132
/// Error configuring ACPI: {0}
129133
#[cfg(target_arch = "x86_64")]
130134
Acpi(#[from] crate::acpi::AcpiError),
135+
/// Error starting GDB debug session
136+
#[cfg(feature = "debug")]
137+
GdbServer(gdb::target::Error),
131138
}
132139

133140
/// It's convenient to automatically convert `linux_loader::cmdline::Error`s
@@ -274,6 +281,15 @@ pub fn build_microvm_for_boot(
274281
cpu_template.kvm_capabilities.clone(),
275282
)?;
276283

284+
#[cfg(feature = "debug")]
285+
let (gdb_tx, gdb_rx) = mpsc::channel();
286+
#[cfg(feature = "debug")]
287+
vcpus
288+
.iter_mut()
289+
.for_each(|vcpu| vcpu.attach_debug_info(gdb_tx.clone()));
290+
#[cfg(feature = "debug")]
291+
let vcpu_fds = vcpus.iter().map(|vcpu| vcpu.copy_kvm_vcpu_fd()).collect();
292+
277293
// The boot timer device needs to be the first device attached in order
278294
// to maintain the same MMIO address referenced in the documentation
279295
// and tests.
@@ -321,16 +337,28 @@ pub fn build_microvm_for_boot(
321337
boot_cmdline,
322338
)?;
323339

340+
let vmm = Arc::new(Mutex::new(vmm));
341+
342+
#[cfg(feature = "debug")]
343+
if let Some(gdb_socket_addr) = &vm_resources.gdb_socket_addr {
344+
gdb::gdb_thread(vmm.clone(), vcpu_fds, gdb_rx, entry_addr, gdb_socket_addr)
345+
.map_err(GdbServer)?;
346+
} else {
347+
debug!("No GDB socket provided not starting gdb server.");
348+
}
349+
324350
// Move vcpus to their own threads and start their state machine in the 'Paused' state.
325-
vmm.start_vcpus(
326-
vcpus,
327-
seccomp_filters
328-
.get("vcpu")
329-
.ok_or_else(|| MissingSeccompFilters("vcpu".to_string()))?
330-
.clone(),
331-
)
332-
.map_err(VmmError::VcpuStart)
333-
.map_err(Internal)?;
351+
vmm.lock()
352+
.unwrap()
353+
.start_vcpus(
354+
vcpus,
355+
seccomp_filters
356+
.get("vcpu")
357+
.ok_or_else(|| MissingSeccompFilters("vcpu".to_string()))?
358+
.clone(),
359+
)
360+
.map_err(VmmError::VcpuStart)
361+
.map_err(Internal)?;
334362

335363
// Load seccomp filters for the VMM thread.
336364
// Execution panics if filters cannot be loaded, use --no-seccomp if skipping filters
@@ -344,7 +372,6 @@ pub fn build_microvm_for_boot(
344372
.map_err(VmmError::SeccompFilters)
345373
.map_err(Internal)?;
346374

347-
let vmm = Arc::new(Mutex::new(vmm));
348375
event_manager.add_subscriber(vmm.clone());
349376

350377
Ok(vmm)

src/vmm/src/gdb/event_loop.rs

+151
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
use std::os::fd::RawFd;
5+
use std::os::unix::net::UnixStream;
6+
use std::sync::mpsc::Receiver;
7+
use std::sync::mpsc::TryRecvError::Empty;
8+
use std::sync::{Arc, Mutex};
9+
10+
use gdbstub::common::{Signal, Tid};
11+
use gdbstub::conn::{Connection, ConnectionExt};
12+
use gdbstub::stub::run_blocking::{self, WaitForStopReasonError};
13+
use gdbstub::stub::{DisconnectReason, GdbStub, MultiThreadStopReason};
14+
use gdbstub::target::Target;
15+
use vm_memory::GuestAddress;
16+
17+
use super::target::{vcpuid_to_tid, Error, FirecrackerTarget};
18+
use crate::logger::trace;
19+
use crate::Vmm;
20+
21+
/// Starts the GDB event loop which acts as a proxy between the Vcpus and GDB
22+
pub fn event_loop(
23+
connection: UnixStream,
24+
vmm: Arc<Mutex<Vmm>>,
25+
vcpu_fds: Vec<RawFd>,
26+
gdb_event_receiver: Receiver<usize>,
27+
entry_addr: GuestAddress,
28+
) {
29+
let target = FirecrackerTarget::new(vmm, vcpu_fds, gdb_event_receiver, entry_addr);
30+
let connection: Box<dyn ConnectionExt<Error = std::io::Error>> = { Box::new(connection) };
31+
let debugger = GdbStub::new(connection);
32+
33+
gdb_event_loop_thread(debugger, target);
34+
}
35+
36+
struct GdbBlockingEventLoop {}
37+
38+
impl run_blocking::BlockingEventLoop for GdbBlockingEventLoop {
39+
type Target = FirecrackerTarget;
40+
type Connection = Box<dyn ConnectionExt<Error = std::io::Error>>;
41+
42+
type StopReason = MultiThreadStopReason<u64>;
43+
44+
/// Poll for events from either Vcpu's or packets from the GDB connection
45+
fn wait_for_stop_reason(
46+
target: &mut FirecrackerTarget,
47+
conn: &mut Self::Connection,
48+
) -> Result<
49+
run_blocking::Event<MultiThreadStopReason<u64>>,
50+
run_blocking::WaitForStopReasonError<
51+
<Self::Target as Target>::Error,
52+
<Self::Connection as Connection>::Error,
53+
>,
54+
> {
55+
loop {
56+
match target.gdb_event.try_recv() {
57+
Ok(cpu_id) => {
58+
// The Vcpu reports it's id from raw_id so we straight convert here
59+
let tid = Tid::new(cpu_id).expect("Error converting cpu id to Tid");
60+
// If notify paused returns false this means we were already debugging a single
61+
// core, the target will track this for us to pick up later
62+
target.update_paused_vcpu(tid);
63+
trace!("Vcpu: {tid:?} paused from debug exit");
64+
65+
let stop_reason = target
66+
.get_stop_reason(tid)
67+
.map_err(WaitForStopReasonError::Target)?;
68+
69+
let Some(stop_response) = stop_reason else {
70+
// If we returned None this is a break which should be handled by
71+
// the guest kernel (e.g. kernel int3 self testing) so we won't notify
72+
// GDB and instead inject this back into the guest
73+
target
74+
.inject_bp_to_guest(tid)
75+
.map_err(WaitForStopReasonError::Target)?;
76+
target
77+
.request_resume(tid)
78+
.map_err(WaitForStopReasonError::Target)?;
79+
80+
trace!("Injected BP into guest early exit");
81+
continue;
82+
};
83+
84+
trace!("Returned stop reason to gdb: {stop_response:?}");
85+
return Ok(run_blocking::Event::TargetStopped(stop_response));
86+
}
87+
Err(Empty) => (),
88+
Err(_) => {
89+
return Err(WaitForStopReasonError::Target(Error::GdbQueueError));
90+
}
91+
}
92+
93+
if conn.peek().map(|b| b.is_some()).unwrap_or(false) {
94+
let byte = conn
95+
.read()
96+
.map_err(run_blocking::WaitForStopReasonError::Connection)?;
97+
return Ok(run_blocking::Event::IncomingData(byte));
98+
}
99+
}
100+
}
101+
102+
/// Invoked when the GDB client sends a Ctrl-C interrupt.
103+
fn on_interrupt(
104+
target: &mut FirecrackerTarget,
105+
) -> Result<Option<MultiThreadStopReason<u64>>, <FirecrackerTarget as Target>::Error> {
106+
// notify the target that a ctrl-c interrupt has occurred.
107+
let main_core = vcpuid_to_tid(0)?;
108+
109+
target.request_pause(main_core)?;
110+
target.update_paused_vcpu(main_core);
111+
112+
let exit_reason = MultiThreadStopReason::SignalWithThread {
113+
tid: main_core,
114+
signal: Signal::SIGINT,
115+
};
116+
Ok(Some(exit_reason))
117+
}
118+
}
119+
120+
/// Runs while communication with GDB is in progress, after GDB disconnects we
121+
/// shutdown firecracker
122+
fn gdb_event_loop_thread(
123+
debugger: GdbStub<FirecrackerTarget, Box<dyn ConnectionExt<Error = std::io::Error>>>,
124+
mut target: FirecrackerTarget,
125+
) {
126+
match debugger.run_blocking::<GdbBlockingEventLoop>(&mut target) {
127+
Ok(disconnect_reason) => match disconnect_reason {
128+
DisconnectReason::Disconnect => {
129+
trace!("Client disconnected")
130+
}
131+
DisconnectReason::TargetExited(code) => {
132+
trace!("Target exited with code {}", code)
133+
}
134+
DisconnectReason::TargetTerminated(sig) => {
135+
trace!("Target terminated with signal {}", sig)
136+
}
137+
DisconnectReason::Kill => trace!("GDB sent a kill command"),
138+
},
139+
Err(e) => {
140+
if e.is_target_error() {
141+
trace!("target encountered a fatal error: {e:?}")
142+
} else if e.is_connection_error() {
143+
trace!("connection error: {e:?}")
144+
} else {
145+
trace!("gdbstub encountered a fatal error {e:?}")
146+
}
147+
}
148+
}
149+
150+
target.shutdown();
151+
}

0 commit comments

Comments
 (0)