Skip to content

Commit 5f1143e

Browse files
committed
Add lpf IPoIB support
1 parent 376abf9 commit 5f1143e

File tree

4 files changed

+249
-14
lines changed

4 files changed

+249
-14
lines changed

src/lib/dhcp/hwaddr.h

+3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ struct HWAddr {
2323
/// @brief Size of an ethernet hardware address.
2424
static const size_t ETHERNET_HWADDR_LEN = 6;
2525

26+
/// @brief Size of an infiniband hardware address.
27+
static const size_t INFINIBAND_HWADDR_LEN = 20;
28+
2629
/// @brief Maximum size of a hardware address.
2730
static const size_t MAX_HWADDR_LEN = 20;
2831

src/lib/dhcp/pkt_filter_lpf.cc

+137-14
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,98 @@ struct sock_filter dhcp_sock_filter [] = {
121121
BPF_STMT(BPF_RET + BPF_K, 0),
122122
};
123123

124+
/// The following structure defines a Berkeley Packet Filter program to perform
125+
/// packet filtering. The program operates on IPoIB pseudo packets. To help with
126+
/// interpretation of the program, for the types of packets we are interested
127+
/// in, the header layout is:
128+
///
129+
/// 20 bytes Source Interface Address
130+
/// 2 bytes Packet Type
131+
/// 2 bytes Reserved/Unused
132+
///
133+
/// The rest is identical to aboves Ethernet-Based packets
134+
///
135+
/// Each instruction is preceded with the comments giving the instruction
136+
/// number within a BPF program, in the following format: #123.
137+
138+
struct sock_filter dhcp_sock_filter_ib [] = {
139+
// Make sure this is an IP packet: check the half-word (two bytes)
140+
// at offset 20 in the packet (the IPoIB pseudo packet type). If it
141+
// is, advance to the next instruction. If not, advance 11
142+
// instructions (which takes execution to the last instruction in
143+
// the sequence: "drop it").
144+
// #0
145+
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, IPOIB_PACKET_TYPE_OFFSET),
146+
// #1
147+
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 0, 11),
148+
149+
// Make sure it's a UDP packet. The IP protocol is at offset
150+
// 9 in the IP header so, adding the IPoIB packet header size
151+
// of 24 bytes gives an absolute byte offset in the packet of 33.
152+
// #2
153+
BPF_STMT(BPF_LD + BPF_B + BPF_ABS,
154+
IPOIB_HEADER_LEN + IP_PROTO_TYPE_OFFSET),
155+
// #3
156+
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 9),
157+
158+
// Make sure this isn't a fragment by checking that the fragment
159+
// offset field in the IP header is zero. This field is the
160+
// least-significant 13 bits in the bytes at offsets 6 and 7 in
161+
// the IP header, so the half-word at offset 30 (6 + size of
162+
// IPoIB header) is loaded and an appropriate mask applied.
163+
// #4
164+
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, IPOIB_HEADER_LEN + IP_FLAGS_OFFSET),
165+
// #5
166+
BPF_JUMP(BPF_JMP + BPF_JSET + BPF_K, 0x1fff, 7, 0),
167+
168+
// Check the packet's destination address. The program will only
169+
// allow the packets sent to the broadcast address or unicast
170+
// to the specific address on the interface. By default, this
171+
// address is set to 0 and must be set to the specific value
172+
// when the raw socket is created and the program is attached
173+
// to it. The caller must assign the address to the
174+
// prog.bf_insns[8].k in the network byte order.
175+
// #6
176+
BPF_STMT(BPF_LD + BPF_W + BPF_ABS,
177+
IPOIB_HEADER_LEN + IP_DEST_ADDR_OFFSET),
178+
// If this is a broadcast address, skip the next check.
179+
// #7
180+
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0xffffffff, 1, 0),
181+
// If this is not broadcast address, compare it with the unicast
182+
// address specified for the interface.
183+
// #8
184+
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x00000000, 0, 4),
185+
186+
// Get the IP header length. This is achieved by the following
187+
// (special) instruction that, given the offset of the start
188+
// of the IP header (offset 24) loads the IP header length.
189+
// #9
190+
BPF_STMT(BPF_LDX + BPF_B + BPF_MSH, IPOIB_HEADER_LEN),
191+
192+
// Make sure it's to the right port. The following instruction
193+
// adds the previously extracted IP header length to the given
194+
// offset to locate the correct byte. The given offset of 26
195+
// comprises the length of the IPoIB header (24) plus the offset
196+
// of the UDP destination port (2) within the UDP header.
197+
// #10
198+
BPF_STMT(BPF_LD + BPF_H + BPF_IND, IPOIB_HEADER_LEN + UDP_DEST_PORT),
199+
// The following instruction tests against the default DHCP server port,
200+
// but the action port is actually set in PktFilterBPF::openSocket().
201+
// N.B. The code in that method assumes that this instruction is at
202+
// offset 11 in the program. If this is changed, openSocket() must be
203+
// updated.
204+
// #11
205+
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DHCP4_SERVER_PORT, 0, 1),
206+
207+
// If we passed all the tests, ask for the whole packet.
208+
// #12
209+
BPF_STMT(BPF_RET + BPF_K, (u_int)-1),
210+
211+
// Otherwise, drop it.
212+
// #13
213+
BPF_STMT(BPF_RET + BPF_K, 0),
214+
};
215+
124216
}
125217

126218
using namespace isc::util;
@@ -169,16 +261,30 @@ PktFilterLPF::openSocket(Iface& iface,
169261
struct sock_fprog filter_program;
170262
memset(&filter_program, 0, sizeof(filter_program));
171263

172-
filter_program.filter = dhcp_sock_filter;
173-
filter_program.len = sizeof(dhcp_sock_filter) / sizeof(struct sock_filter);
264+
if (iface.getHWType() == HTYPE_INFINIBAND) {
265+
filter_program.filter = dhcp_sock_filter_ib;
266+
filter_program.len = sizeof(dhcp_sock_filter_ib) / sizeof(struct sock_filter);
267+
268+
// Configure the filter program to receive unicast packets sent to the
269+
// specified address. The program will also allow packets sent to the
270+
// 255.255.255.255 broadcast address.
271+
dhcp_sock_filter_ib[8].k = addr.toUint32();
174272

175-
// Configure the filter program to receive unicast packets sent to the
176-
// specified address. The program will also allow packets sent to the
177-
// 255.255.255.255 broadcast address.
178-
dhcp_sock_filter[8].k = addr.toUint32();
273+
// Override the default port value.
274+
dhcp_sock_filter_ib[11].k = port;
275+
} else {
276+
filter_program.filter = dhcp_sock_filter;
277+
filter_program.len = sizeof(dhcp_sock_filter) / sizeof(struct sock_filter);
278+
279+
// Configure the filter program to receive unicast packets sent to the
280+
// specified address. The program will also allow packets sent to the
281+
// 255.255.255.255 broadcast address.
282+
dhcp_sock_filter[8].k = addr.toUint32();
283+
284+
// Override the default port value.
285+
dhcp_sock_filter[11].k = port;
286+
}
179287

180-
// Override the default port value.
181-
dhcp_sock_filter[11].k = port;
182288
// Apply the filter.
183289
if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter_program,
184290
sizeof(filter_program)) < 0) {
@@ -315,7 +421,21 @@ PktFilterLPF::receive(Iface& iface, const SocketInfo& socket_info) {
315421
Pkt4Ptr dummy_pkt = Pkt4Ptr(new Pkt4(DHCPDISCOVER, 0));
316422

317423
// Decode ethernet, ip and udp headers.
318-
decodeEthernetHeader(buf, dummy_pkt);
424+
if (iface.getHWType() == HTYPE_INFINIBAND) {
425+
decodeIPoIBHeader(buf, dummy_pkt);
426+
427+
// The IPoIB header does not contain the local address.
428+
// Set it from the interface instead.
429+
if (iface.getMacLen() != HWAddr::INFINIBAND_HWADDR_LEN) {
430+
isc_throw(SocketReadError,
431+
"Invalid local hardware address size for IPoIB interface.");
432+
}
433+
HWAddrPtr hwaddr(new HWAddr(iface.getMac(), iface.getMacLen(),
434+
iface.getHWType()));
435+
dummy_pkt->setLocalHWAddr(hwaddr);
436+
} else {
437+
decodeEthernetHeader(buf, dummy_pkt);
438+
}
319439
decodeIpUdpHeader(buf, dummy_pkt);
320440

321441
// Read the DHCP data.
@@ -374,11 +494,14 @@ PktFilterLPF::send(const Iface& iface, uint16_t sockfd, const Pkt4Ptr& pkt) {
374494
pkt->setLocalHWAddr(hwaddr);
375495
}
376496

377-
378-
// Ethernet frame header.
379-
// Note that we don't validate whether HW addresses in 'pkt'
380-
// are valid because they are checked by the function called.
381-
writeEthernetHeader(pkt, buf);
497+
if (iface.getHWType() == HTYPE_INFINIBAND) {
498+
writeIPoIBHeader(iface, pkt, buf);
499+
} else {
500+
// Ethernet frame header.
501+
// Note that we don't validate whether HW addresses in 'pkt'
502+
// are valid because they are checked by the function called.
503+
writeEthernetHeader(pkt, buf);
504+
}
382505

383506
// IP and UDP header
384507
writeIpUdpHeader(pkt, buf);

src/lib/dhcp/protocol_util.cc

+72
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,39 @@ decodeEthernetHeader(InputBuffer& buf, Pkt4Ptr& pkt) {
5858
buf.setPosition(start_pos + ETHERNET_HEADER_LEN);
5959
}
6060

61+
void
62+
decodeIPoIBHeader(InputBuffer& buf, Pkt4Ptr& pkt) {
63+
// The size of the buffer to be parsed must not be lower
64+
// then the size of the IPoIB frame header.
65+
if (buf.getLength() - buf.getPosition() < IPOIB_HEADER_LEN) {
66+
isc_throw(InvalidPacketHeader, "size of ethernet header in received "
67+
<< "packet is invalid, expected at least "
68+
<< IPOIB_HEADER_LEN << " bytes, received "
69+
<< buf.getLength() - buf.getPosition() << " bytes");
70+
}
71+
// Packet object must not be NULL. We want to output some values
72+
// to this object.
73+
if (!pkt) {
74+
isc_throw(BadValue, "NULL packet object provided when parsing ethernet"
75+
" frame header");
76+
}
77+
78+
// The size of the single address is always lower then the size of
79+
// the header that holds this address. Otherwise, it is a programming
80+
// error that we want to detect in the compilation time.
81+
BOOST_STATIC_ASSERT(IPOIB_HEADER_LEN > HWAddr::INFINIBAND_HWADDR_LEN);
82+
83+
// Remember initial position.
84+
size_t start_pos = buf.getPosition();
85+
86+
// Read the source HW address.
87+
std::vector<uint8_t> src_addr;
88+
buf.readVector(src_addr, HWAddr::INFINIBAND_HWADDR_LEN);
89+
pkt->setRemoteHWAddr(HWTYPE_INFINIBAND, HWAddr::INFINIBAND_HWADDR_LEN, src_addr);
90+
// Move the buffer read pointer to the end of the Ethernet frame header.
91+
buf.setPosition(start_pos + IPOIB_HEADER_LEN);
92+
}
93+
6194
void
6295
decodeIpUdpHeader(InputBuffer& buf, Pkt4Ptr& pkt) {
6396
// The size of the buffer must be at least equal to the minimal size of
@@ -161,6 +194,45 @@ writeEthernetHeader(const Pkt4Ptr& pkt, OutputBuffer& out_buf) {
161194
out_buf.writeUint16(ETHERNET_TYPE_IP);
162195
}
163196

197+
void
198+
writeIPoIBHeader(const Iface& iface, const Pkt4Ptr& pkt, OutputBuffer& out_buf) {
199+
// Set destination HW address.
200+
HWAddrPtr remote_addr = pkt->getRemoteHWAddr();
201+
if (remote_addr) {
202+
if (remote_addr->hwaddr_.size() != HWAddr::INFINIBAND_HWADDR_LEN) {
203+
isc_throw(BadValue, "invalid size of the remote HW address "
204+
<< remote_addr->hwaddr_.size() << " when constructing"
205+
<< " an ethernet frame header; expected size is"
206+
<< " " << HWAddr::INFINIBAND_HWADDR_LEN);
207+
} else if (!pkt->isRelayed() &&
208+
(pkt->getFlags() & Pkt4::FLAG_BROADCAST_MASK)) {
209+
if (iface.getBcastMacLen() != HWAddr::INFINIBAND_HWADDR_LEN) {
210+
isc_throw(BadValue, "invalid size of the bcast HW address "
211+
<< iface.getBcastMacLen() << " when constructing"
212+
<< " an ethernet frame header; expected size is"
213+
<< " " << HWAddr::INFINIBAND_HWADDR_LEN);
214+
}
215+
out_buf.writeData(iface.getBcastMac(),
216+
HWAddr::INFINIBAND_HWADDR_LEN);
217+
} else {
218+
out_buf.writeData(&remote_addr->hwaddr_[0],
219+
HWAddr::INFINIBAND_HWADDR_LEN);
220+
}
221+
} else {
222+
// HW address has not been specified. This is possible when receiving
223+
// packet through a logical interface (e.g. lo). In such cases, we
224+
// don't want to fail but rather provide a default HW address, which
225+
// consists of zeros.
226+
out_buf.writeData(&std::vector<uint8_t>(HWAddr::INFINIBAND_HWADDR_LEN)[0],
227+
HWAddr::INFINIBAND_HWADDR_LEN);
228+
}
229+
230+
// Type IP.
231+
out_buf.writeUint16(ETHERNET_TYPE_IP);
232+
// Reserved
233+
out_buf.writeUint16(0);
234+
}
235+
164236
void
165237
writeIpUdpHeader(const Pkt4Ptr& pkt, util::OutputBuffer& out_buf) {
166238

src/lib/dhcp/protocol_util.h

+37
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#define PROTOCOL_UTIL_H
99

1010
#include <dhcp/pkt4.h>
11+
#include <dhcp/iface_mgr.h>
1112
#include <util/buffer.h>
1213

1314
#include <stdint.h>
@@ -39,6 +40,12 @@ static const size_t ETHERNET_PACKET_TYPE_OFFSET = 12;
3940
/// and locations on different OSes.
4041
static const uint16_t ETHERNET_TYPE_IP = 0x0800;
4142

43+
/// Size of the IPoIB pseude frame header.
44+
static const size_t IPOIB_HEADER_LEN = 24;
45+
/// Offset of the 2-byte word in the IPoIB pseudo packet which
46+
/// holds the type of the protocol it encapsulates.
47+
static const size_t IPOIB_PACKET_TYPE_OFFSET = 20;
48+
4249
/// Minimal IPv4 header length.
4350
static const size_t MIN_IP_HEADER_LEN = 20;
4451
/// Offset in the IP header where the flags field starts.
@@ -75,6 +82,25 @@ static const size_t UDP_DEST_PORT = 2;
7582
/// @throw BadValue if pkt object is NULL.
7683
void decodeEthernetHeader(util::InputBuffer& buf, Pkt4Ptr& pkt);
7784

85+
/// @brief Decode the IPoIB pseudo header.
86+
///
87+
/// This function reads IPoIB pesudo frame header from the provided
88+
/// buffer at the current read position. The source HW address
89+
/// is read from the header and assigned as client address in
90+
/// the pkt object. The buffer read pointer is set to the end
91+
/// of the IPoIB frame header if read was successful.
92+
///
93+
/// @warning This function does not check that the provided 'pkt'
94+
/// pointer is valid. Caller must make sure that pointer is
95+
/// allocated.
96+
///
97+
/// @param buf input buffer holding header to be parsed.
98+
/// @param [out] pkt packet object receiving HW source address read from header.
99+
///
100+
/// @throw InvalidPacketHeader if packet header is truncated
101+
/// @throw BadValue if pkt object is NULL.
102+
void decodeIPoIBHeader(util::InputBuffer& buf, Pkt4Ptr& pkt);
103+
78104
/// @brief Decode IP and UDP header.
79105
///
80106
/// This function reads IP and UDP headers from the provided buffer
@@ -105,6 +131,17 @@ void decodeIpUdpHeader(util::InputBuffer& buf, Pkt4Ptr& pkt);
105131
void writeEthernetHeader(const Pkt4Ptr& pkt,
106132
util::OutputBuffer& out_buf);
107133

134+
/// @brief Writes IPoIB pseudo frame header into a buffer.
135+
///
136+
/// @warning This function does not check that the provided 'pkt'
137+
/// pointer is valid. Caller must make sure that pointer is
138+
/// allocated.
139+
///
140+
/// @param pkt packet object holding source and destination HW address.
141+
/// @param [out] out_buf buffer where a header is written.
142+
void writeIPoIBHeader(const Iface& iface, const Pkt4Ptr& pkt,
143+
util::OutputBuffer& out_buf);
144+
108145
/// @brief Writes both IP and UDP header into output buffer
109146
///
110147
/// This utility function assembles IP and UDP packet headers for the

0 commit comments

Comments
 (0)