Skip to content

Commit e945f32

Browse files
committed
Add eBPF program for controlling device access
This is a simple program which allows mknod, a standard list of devices to be allowed inside the container, and a hashmap mapping a list of devices to allwoed accesses. This allows runtime update on whether a device is allowed inside a container. It is automatically compiled with build.rs.
1 parent 7145210 commit e945f32

File tree

10 files changed

+330
-2
lines changed

10 files changed

+330
-2
lines changed

.github/workflows/ci.yml

+10-1
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,20 @@ jobs:
99
steps:
1010
- uses: actions/checkout@v4
1111

12-
- name: Install dependency
12+
- name: Install apt dependency
1313
run: |
1414
sudo apt-get update
1515
sudo apt-get install -y libudev-dev
1616
17+
# Need to use nightly toolchain for eBPF
18+
- uses: dtolnay/rust-toolchain@nightly
19+
with:
20+
components: rust-src
21+
22+
- name: Install bpf-linker
23+
run: |
24+
cargo install bpf-linker
25+
1726
- name: Build
1827
run: cargo build --release
1928

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
/target
2-
/ott
2+
/cgroup_device_filter/target
3+
/ott

Cargo.lock

+29
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+7
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,10 @@ bollard = "0.16"
3030
futures = "0.3"
3131
rustix = { version = "0.38", features = ["fs", "stdio", "termios"] }
3232
bitflags = "2"
33+
34+
[build-dependencies]
35+
anyhow = { version = "1", features = ["backtrace"] }
36+
walkdir = "2"
37+
38+
[workspace]
39+
exclude = ["cgroup_device_filter"]

build.rs

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
use anyhow::{Context, Result};
2+
3+
fn main() -> Result<()> {
4+
// We need to rerun the build script if any files in the cgroup_device_filter change.
5+
for entry in walkdir::WalkDir::new("cgroup_device_filter")
6+
.into_iter()
7+
.filter_entry(|entry| {
8+
entry
9+
.file_name()
10+
.to_str()
11+
.map(|s| s != "target")
12+
.unwrap_or(true)
13+
})
14+
{
15+
let entry = entry?;
16+
if entry.file_type().is_file() {
17+
println!(
18+
"cargo:rerun-if-changed={}",
19+
entry.path().to_str().context("file name not UTF-8")?
20+
);
21+
}
22+
}
23+
24+
// Run cargo to compile the eBPF program.
25+
let status = std::process::Command::new("cargo")
26+
.current_dir("cgroup_device_filter")
27+
.args(["build", "--release"])
28+
.status()?;
29+
30+
if !status.success() {
31+
anyhow::bail!("Failed to build eBPF program");
32+
}
33+
34+
Ok(())
35+
}
+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[build]
2+
target = "bpfel-unknown-none"
3+
4+
[unstable]
5+
build-std = ["core"]

cgroup_device_filter/Cargo.lock

+115
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cgroup_device_filter/Cargo.toml

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[package]
2+
name = "cgroup_device_filter"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[dependencies]
7+
aya-bpf = { git = "https://github.com/aya-rs/aya.git" }
+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[toolchain]
2+
channel = "nightly"

cgroup_device_filter/src/main.rs

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#![no_std]
2+
#![no_main]
3+
4+
use aya_bpf::bindings::{
5+
BPF_DEVCG_ACC_MKNOD, BPF_DEVCG_DEV_BLOCK, BPF_DEVCG_DEV_CHAR, BPF_F_NO_PREALLOC,
6+
};
7+
use aya_bpf::macros::{cgroup_device, map};
8+
use aya_bpf::maps::HashMap;
9+
use aya_bpf::programs::DeviceContext;
10+
11+
#[repr(C)]
12+
#[derive(Clone, Copy, PartialEq, Eq)]
13+
struct Device {
14+
/// Type of device. BPF_DEVCG_DEV_BLOCK or BPF_DEVCG_DEV_CHAR.
15+
ty: u32,
16+
major: u32,
17+
minor: u32,
18+
}
19+
20+
const DEV_NULL: Device = Device {
21+
ty: BPF_DEVCG_DEV_CHAR,
22+
major: 1,
23+
minor: 3,
24+
};
25+
26+
const DEV_ZERO: Device = Device {
27+
ty: BPF_DEVCG_DEV_CHAR,
28+
major: 1,
29+
minor: 5,
30+
};
31+
32+
const DEV_FULL: Device = Device {
33+
ty: BPF_DEVCG_DEV_CHAR,
34+
major: 1,
35+
minor: 7,
36+
};
37+
38+
const DEV_RANDOM: Device = Device {
39+
ty: BPF_DEVCG_DEV_CHAR,
40+
major: 1,
41+
minor: 8,
42+
};
43+
44+
const DEV_URANDOM: Device = Device {
45+
ty: BPF_DEVCG_DEV_CHAR,
46+
major: 1,
47+
minor: 9,
48+
};
49+
50+
const DEV_TTY: Device = Device {
51+
ty: BPF_DEVCG_DEV_CHAR,
52+
major: 5,
53+
minor: 0,
54+
};
55+
56+
const DEV_CONSOLE: Device = Device {
57+
ty: BPF_DEVCG_DEV_CHAR,
58+
major: 5,
59+
minor: 1,
60+
};
61+
62+
const DEV_PTMX: Device = Device {
63+
ty: BPF_DEVCG_DEV_CHAR,
64+
major: 5,
65+
minor: 2,
66+
};
67+
68+
#[map(name = "DEVICE_PERM")]
69+
/// Hashmap storing a device -> permission mapping.
70+
///
71+
/// This is modified from user-space to change permission.
72+
static DEVICE_PERM: HashMap<Device, u32> = HashMap::with_max_entries(256, BPF_F_NO_PREALLOC);
73+
74+
#[cgroup_device]
75+
fn check_device(ctx: DeviceContext) -> i32 {
76+
// SAFETY: This is a POD supplied by the kernel.
77+
let ctx_dev = unsafe { *ctx.device };
78+
let dev = Device {
79+
// access_type's lower 16 bits are the device type, upper 16 bits are the access type.
80+
ty: ctx_dev.access_type & 0xFFFF,
81+
major: ctx_dev.major,
82+
minor: ctx_dev.minor,
83+
};
84+
let access = ctx_dev.access_type >> 16;
85+
86+
// Always allow mknod, we restrict on access not on creation.
87+
// This is consistent with eBPF genereated by Docker.
88+
if matches!(dev.ty, BPF_DEVCG_DEV_BLOCK | BPF_DEVCG_DEV_CHAR) && access == BPF_DEVCG_ACC_MKNOD {
89+
return 1;
90+
}
91+
92+
// Allow default devices for containers
93+
// https://github.com/opencontainers/runtime-spec/blob/main/config-linux.md
94+
match dev {
95+
DEV_NULL | DEV_ZERO | DEV_FULL | DEV_RANDOM | DEV_URANDOM => return 1,
96+
DEV_TTY | DEV_CONSOLE | DEV_PTMX => return 1,
97+
// Pseudo-PTY
98+
Device {
99+
ty: BPF_DEVCG_DEV_CHAR,
100+
major: 136,
101+
minor: _,
102+
} => return 1,
103+
_ => (),
104+
}
105+
106+
// For extra devices, check the map.
107+
// SAFETY: we have BPF_F_NO_PREALLOC enabled so the map is safe to access concurrently.
108+
let device_perm = unsafe { DEVICE_PERM.get(&dev).copied() };
109+
match device_perm {
110+
Some(perm) => (perm & access == access) as i32,
111+
None => 0,
112+
}
113+
}
114+
115+
#[panic_handler]
116+
fn panic(_info: &core::panic::PanicInfo) -> ! {
117+
loop {}
118+
}

0 commit comments

Comments
 (0)