Skip to content

Commit 31d8a2d

Browse files
committed
tcproute: add support for round-robin load balancing
Add support for round-robin load balancing for `TCPRoute`. This enables mutliple backend references in a UDPRoute object, with traffic being distributed to each backend in a round-robin fashion. A new BPF map `TCP_CONNECTIONS` was introduced to help keep track of active TCP connections, with its key storing the client <ip:port> identifier and the value storing the backend's <ip:port> along with the state of the connection. Signed-off-by: Sanskar Jaiswal <[email protected]>
1 parent dc1a070 commit 31d8a2d

File tree

9 files changed

+366
-91
lines changed

9 files changed

+366
-91
lines changed

dataplane/api-server/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,16 @@ use aya::maps::{HashMap, MapData};
1515
use tonic::transport::Server;
1616

1717
use backends::backends_server::BackendsServer;
18-
use common::{BackendKey, BackendList};
18+
use common::{BackendKey, BackendList, ClientKey, TCPBackend};
1919

2020
pub async fn start(
2121
addr: Ipv4Addr,
2222
port: u16,
2323
backends_map: HashMap<MapData, BackendKey, BackendList>,
2424
gateway_indexes_map: HashMap<MapData, BackendKey, u16>,
25+
tcp_conns_map: HashMap<MapData, ClientKey, TCPBackend>,
2526
) -> Result<(), Error> {
26-
let server = server::BackendService::new(backends_map, gateway_indexes_map);
27+
let server = server::BackendService::new(backends_map, gateway_indexes_map, tcp_conns_map);
2728
// TODO: mTLS https://github.com/Kong/blixt/issues/50
2829
Server::builder()
2930
.add_service(BackendsServer::new(server))

dataplane/api-server/src/server.rs

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,24 @@ use tonic::{Request, Response, Status};
1515
use crate::backends::backends_server::Backends;
1616
use crate::backends::{Confirmation, InterfaceIndexConfirmation, PodIp, Targets, Vip};
1717
use crate::netutils::{if_name_for_routing_ip, if_nametoindex};
18-
use common::{Backend, BackendKey, BackendList, BACKENDS_ARRAY_CAPACITY};
18+
use common::{Backend, BackendKey, BackendList, ClientKey, TCPBackend, BACKENDS_ARRAY_CAPACITY};
1919

2020
pub struct BackendService {
2121
backends_map: Arc<Mutex<HashMap<MapData, BackendKey, BackendList>>>,
2222
gateway_indexes_map: Arc<Mutex<HashMap<MapData, BackendKey, u16>>>,
23+
tcp_conns_map: Arc<Mutex<HashMap<MapData, ClientKey, TCPBackend>>>,
2324
}
2425

2526
impl BackendService {
2627
pub fn new(
2728
backends_map: HashMap<MapData, BackendKey, BackendList>,
2829
gateway_indexes_map: HashMap<MapData, BackendKey, u16>,
30+
tcp_conns_map: HashMap<MapData, ClientKey, TCPBackend>,
2931
) -> BackendService {
3032
BackendService {
3133
backends_map: Arc::new(Mutex::new(backends_map)),
3234
gateway_indexes_map: Arc::new(Mutex::new(gateway_indexes_map)),
35+
tcp_conns_map: Arc::new(Mutex::new(tcp_conns_map)),
3336
}
3437
}
3538

@@ -51,6 +54,31 @@ impl BackendService {
5154
backends_map.remove(&key)?;
5255
let mut gateway_indexes_map = self.gateway_indexes_map.lock().await;
5356
gateway_indexes_map.remove(&key)?;
57+
58+
// Delete all entries in our tcp connection tracking map that this backend
59+
// key was related to. This is needed because the TCPRoute might have been
60+
// deleted when there we TCP connections active, so without the below logic
61+
// they'll hang around forever.
62+
// Its better to do this rather than maintain a reverse index because the index
63+
// would need to be updated with each new connection and delete is less
64+
// used operation.
65+
let mut tcp_conns_map = self.tcp_conns_map.lock().await;
66+
let mut expired_client_keys: Vec<ClientKey> = vec![];
67+
tcp_conns_map.iter().for_each(|val| {
68+
if let Ok((client_key, tcp_backned)) = val {
69+
let bk = BackendKey {
70+
ip: u32::from_be(key.ip),
71+
port: u16::from_be(key.port as u16) as u32,
72+
};
73+
if tcp_backned.backend_key == bk {
74+
expired_client_keys.push(client_key);
75+
}
76+
}
77+
});
78+
79+
for ck in expired_client_keys {
80+
tcp_conns_map.remove(&ck)?;
81+
}
5482
Ok(())
5583
}
5684
}
@@ -144,9 +172,10 @@ impl Backends for BackendService {
144172
match self.insert_and_reset_index(key, backend_list).await {
145173
Ok(_) => Ok(Response::new(Confirmation {
146174
confirmation: format!(
147-
"success, vip {}:{} was updated",
175+
"success, vip {}:{} was updated with {} backends",
148176
Ipv4Addr::from(vip.ip),
149-
vip.port
177+
vip.port,
178+
count,
150179
),
151180
})),
152181
Err(err) => Err(Status::internal(format!("failure: {}", err))),

dataplane/common/src/lib.rs

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ pub struct Backend {
2020
#[cfg(feature = "user")]
2121
unsafe impl aya::Pod for Backend {}
2222

23-
#[derive(Copy, Clone, Debug)]
23+
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
2424
#[repr(C)]
2525
pub struct BackendKey {
2626
pub ip: u32,
@@ -40,3 +40,39 @@ pub struct BackendList {
4040

4141
#[cfg(feature = "user")]
4242
unsafe impl aya::Pod for BackendList {}
43+
44+
#[derive(Copy, Clone, Debug)]
45+
#[repr(C)]
46+
pub struct ClientKey {
47+
pub ip: u32,
48+
pub port: u32,
49+
}
50+
51+
#[cfg(feature = "user")]
52+
unsafe impl aya::Pod for ClientKey {}
53+
54+
#[derive(Copy, Clone, Debug, Default)]
55+
#[repr(C)]
56+
pub enum TCPState {
57+
#[default]
58+
Established,
59+
FinWait1,
60+
FinWait2,
61+
Closing,
62+
TimeWait,
63+
Closed,
64+
}
65+
66+
#[cfg(feature = "user")]
67+
unsafe impl aya::Pod for TCPState {}
68+
69+
#[derive(Copy, Clone, Debug)]
70+
#[repr(C)]
71+
pub struct TCPBackend {
72+
pub backend: Backend,
73+
pub backend_key: BackendKey,
74+
pub state: TCPState,
75+
}
76+
77+
#[cfg(feature = "user")]
78+
unsafe impl aya::Pod for TCPBackend {}

dataplane/ebpf/src/egress/tcp.rs

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ use aya_bpf::{
1212
programs::TcContext,
1313
};
1414
use aya_log_ebpf::info;
15+
use common::{ClientKey, TCPBackend, TCPState};
1516
use network_types::{eth::EthHdr, ip::Ipv4Hdr, tcp::TcpHdr};
1617

1718
use crate::{
18-
utils::{csum_fold_helper, ptr_at},
19-
BLIXT_CONNTRACK,
19+
utils::{csum_fold_helper, handle_tcp_conn_close, ptr_at},
20+
TCP_CONNECTIONS,
2021
};
2122

2223
pub fn handle_tcp_egress(ctx: TcContext) -> Result<i32, i64> {
@@ -29,24 +30,26 @@ pub fn handle_tcp_egress(ctx: TcContext) -> Result<i32, i64> {
2930

3031
// capture some IP and port information
3132
let client_addr = unsafe { (*ip_hdr).dst_addr };
32-
let dest_port = unsafe { (*tcp_hdr).dest.to_be() };
33-
let ip_port_tuple = unsafe { BLIXT_CONNTRACK.get(&client_addr) }.ok_or(TC_ACT_PIPE)?;
34-
35-
// verify traffic destination
36-
if ip_port_tuple.1 as u16 != dest_port {
37-
return Ok(TC_ACT_PIPE);
38-
}
33+
let dest_port = unsafe { (*tcp_hdr).dest };
34+
// The source identifier
35+
let client_key = ClientKey {
36+
ip: u32::from_be(client_addr),
37+
port: u16::from_be(dest_port) as u32,
38+
};
39+
let tcp_backend = unsafe { TCP_CONNECTIONS.get(&client_key) }.ok_or(TC_ACT_PIPE)?;
3940

4041
info!(
4142
&ctx,
42-
"Received TCP packet destined for tracked IP {:i}:{} setting source IP to VIP {:i}",
43+
"Received TCP packet destined for tracked IP {:i}:{} setting source IP to VIP {:i}:{}",
4344
u32::from_be(client_addr),
44-
ip_port_tuple.1 as u16,
45-
u32::from_be(ip_port_tuple.0),
45+
u16::from_be(dest_port),
46+
tcp_backend.backend_key.ip,
47+
tcp_backend.backend_key.port,
4648
);
4749

50+
// SNAT the ip address
4851
unsafe {
49-
(*ip_hdr).src_addr = ip_port_tuple.0;
52+
(*ip_hdr).src_addr = tcp_backend.backend_key.ip;
5053
};
5154

5255
if (ctx.data() + EthHdr::LEN + Ipv4Hdr::LEN) > ctx.data_end() {
@@ -68,6 +71,39 @@ pub fn handle_tcp_egress(ctx: TcContext) -> Result<i32, i64> {
6871
unsafe { (*tcp_hdr).check = 0 };
6972

7073
// TODO: connection tracking cleanup https://github.com/kubernetes-sigs/blixt/issues/85
74+
// SNAT the port
75+
unsafe { (*tcp_hdr).source = tcp_backend.backend_key.port as u16 };
76+
77+
let tcp_hdr_ref = unsafe { tcp_hdr.as_ref().ok_or(TC_ACT_OK)? };
78+
79+
// If the packet has the RST flag set, it means the connection is being terminated, so remove it
80+
// from our map.
81+
if tcp_hdr_ref.rst() == 1 {
82+
unsafe {
83+
TCP_CONNECTIONS.remove(&client_key)?;
84+
}
85+
}
86+
87+
let mut tcp_state = tcp_backend.state;
88+
let moved = handle_tcp_conn_close(tcp_hdr_ref, &mut tcp_state);
89+
// If the connection has moved to the Closed state, stop tracking it.
90+
if let TCPState::Closed = tcp_state {
91+
unsafe {
92+
TCP_CONNECTIONS.remove(&client_key)?;
93+
}
94+
// If the connection has not reached the Closed state yet, but it did advance to a new state,
95+
// then record the new state.
96+
} else if moved {
97+
let bk = *tcp_backend;
98+
let new_tcp_backend = TCPBackend {
99+
backend: bk.backend,
100+
backend_key: bk.backend_key,
101+
state: tcp_state,
102+
};
103+
unsafe {
104+
TCP_CONNECTIONS.insert(&client_key, &new_tcp_backend, 0_u64)?;
105+
}
106+
}
71107

72108
Ok(TC_ACT_PIPE)
73109
}

0 commit comments

Comments
 (0)