diff --git a/probe/endpoint/connection_tracker.go b/probe/endpoint/connection_tracker.go index e65c4d3c50..8d3e3cbbf7 100644 --- a/probe/endpoint/connection_tracker.go +++ b/probe/endpoint/connection_tracker.go @@ -43,18 +43,19 @@ func newConnectionTracker(conf ReporterConfig) connectionTracker { } func flowToTuple(f conntrack.Conn) (ft fourTuple) { - ft = fourTuple{ - f.Orig.Src.String(), - f.Orig.Dst.String(), - uint16(f.Orig.SrcPort), - uint16(f.Orig.DstPort), - } - // Handle DNAT-ed connections in the initial state - if !f.Orig.Dst.Equal(f.Reply.Src) { + if (f.Status & conntrack.IPS_DST_NAT) == 0 { + ft = fourTuple{ + f.Orig.Src.String(), + f.Orig.Dst.String(), + uint16(f.Orig.SrcPort), + uint16(f.Orig.DstPort), + } + } else { + // Handle DNAT-ed connections in the initial state ft = fourTuple{ - f.Reply.Dst.String(), + f.Orig.Src.String(), f.Reply.Src.String(), - uint16(f.Reply.DstPort), + uint16(f.Orig.SrcPort), uint16(f.Reply.SrcPort), } } diff --git a/probe/endpoint/nat.go b/probe/endpoint/nat.go index 797a0d1a84..ea66cf775e 100644 --- a/probe/endpoint/nat.go +++ b/probe/endpoint/nat.go @@ -30,45 +30,102 @@ func makeNATMapper(fw flowWalker) natMapper { return natMapper{fw} } -func toMapping(f conntrack.Conn) *endpointMapping { - var mapping endpointMapping - if f.Orig.Src.Equal(f.Reply.Dst) { - mapping = endpointMapping{ - originalIP: f.Reply.Src, - originalPort: f.Reply.SrcPort, - rewrittenIP: f.Orig.Dst, - rewrittenPort: f.Orig.DstPort, - } - } else { - mapping = endpointMapping{ - originalIP: f.Orig.Src, - originalPort: f.Orig.SrcPort, - rewrittenIP: f.Reply.Dst, - rewrittenPort: f.Reply.DstPort, - } - } - - return &mapping +func endpointNodeID(scope string, ip net.IP, port uint16) string { + return report.MakeEndpointNodeID(scope, "", ip.String(), strconv.Itoa(int(port))) } -// applyNAT duplicates Nodes in the endpoint topology of a report, based on +/* + +Some examples of connections with NAT: + +Here 10.32.0.X are pod addresses; 172.31.X.X are node addresses; 10.10X.X.X are service virtual addresses. + +Pod to pod via Kubernetes service + picked up by ebpf as 10.32.0.16:47600->10.105.173.176:5432 and 10.32.0.6:5432 (??) + NAT IPS_DST_NAT orig: 10.32.0.16:47600->10.105.173.176:5432, reply: 10.32.0.6:5432->10.32.0.16:47600 + We want: 10.32.0.16:47600->10.32.0.6:5432 + - replace the destination (== NAT orig dst) with the NAT reply source (A) + +Incoming from outside the cluster to a NodePort: + picked up by ebpf as 10.32.0.1:13488->10.32.0.7:80 + NAT: IPS_SRC_NAT IPS_DST_NAT orig: 37.157.33.76:13488->172.31.2.17:30081, reply: 10.32.0.7:80->10.32.0.1:13488 + We want: 37.157.33.76:13488->10.32.0.7:80 + - replace the source (== NAT reply dst) with the NAT original source (B) + To match another probe with the other side of this connection, also want 37.157.33.76:13488->172.31.2.17:30081 + - add NAT original dst as a copy of nat reply source (C) + +Outgoing from a pod: + picked up by ebpf as 10.32.0.7:36078->18.221.99.178:443 + NAT: IPS_SRC_NAT orig: 10.32.0.7:36078->18.221.99.178:443, reply: 18.221.99.178:443->172.31.2.17:36078 + We want: 10.32.0.7:36078->18.221.99.178:443 + - leave it alone. (D) + +Docker container exposing port to similar on different host +host1: + picked up by ebpf as ip-172-31-5-80;172.17.0.2:43042->172.31.2.17:8080 + NAT: IPS_SRC_NAT orig: 172.17.0.2:43042->172.31.2.17:8080, reply: 172.31.2.17:8080-> 172.31.5.80:43042 + We want: 172.31.5.80:43042->172.31.2.17:8080 + - can't have a blanket rule to replace NAT original source with NAT reply destination, because that breaks case D. + we could add 172.31.5.80:43042 (nat reply destination) as a copy of ip-172-31-5-80;172.17.0.2:43042 (nat orig source) (E) +host2: + picked up by ebpf as 172.31.5.80:43042->ip-172-31-2-17;172.17.0.2:80 + NAT: IPS_DST_NAT orig: 172.31.5.80:43042->172.31.2.17:8080, reply: 172.17.0.2:80->172.31.5.80:43042 + Rule A doesn't match and rule B is a no-op because the addresses are the same. + To match another probe with the other side of this connection, also want 172.31.5.80:43042->172.31.2.17:8080 + - add NAT original dst as a copy of nat reply source (C) + +All of the above can be satisfied by these rules: + For SRC_NAT + replace the source (== NAT reply dst) with the NAT original source (B) + or add NAT reply destination as a copy of NAT original source (E) + For DST_NAT + replace NAT original destination in adjacencies with the NAT reply source (A) + or add NAT original destination as a copy of NAT reply source (C) +*/ + +// applyNAT modifies Nodes in the endpoint topology of a report, based on // the NAT table. func (n natMapper) applyNAT(rpt report.Report, scope string) { n.flowWalker.walkFlows(func(f conntrack.Conn, _ bool) { - mapping := toMapping(f) - realEndpointPort := strconv.Itoa(int(mapping.originalPort)) - copyEndpointPort := strconv.Itoa(int(mapping.rewrittenPort)) - realEndpointID := report.MakeEndpointNodeID(scope, "", mapping.originalIP.String(), realEndpointPort) - copyEndpointID := report.MakeEndpointNodeID(scope, "", mapping.rewrittenIP.String(), copyEndpointPort) - - node, ok := rpt.Endpoint.Nodes[realEndpointID] - if !ok { - return + if (f.Status & conntrack.IPS_SRC_NAT) != 0 { + origSrcID := endpointNodeID(scope, f.Orig.Src, f.Orig.SrcPort) + replyDstID := endpointNodeID(scope, f.Reply.Dst, f.Reply.DstPort) + if replyDstID != origSrcID { + if fromNode, ok := rpt.Endpoint.Nodes[replyDstID]; ok { + // replace the source (== NAT reply dst) with the NAT original source (B) + delete(rpt.Endpoint.Nodes, replyDstID) + rpt.Endpoint.AddNode(fromNode.WithID(origSrcID)) + } else if origSrcNode, ok := rpt.Endpoint.Nodes[origSrcID]; ok { + // add NAT reply destination as a copy of NAT original source (E) + newNode := origSrcNode.WithID(replyDstID).WithLatests(map[string]string{ + CopyOf: origSrcID, + }) + rpt.Endpoint.AddNode(newNode) + } + } } - rpt.Endpoint.AddNode(node.WithID(copyEndpointID).WithLatests(map[string]string{ - CopyOf: realEndpointID, - })) + if (f.Status & conntrack.IPS_DST_NAT) != 0 { + replySrcID := endpointNodeID(scope, f.Reply.Src, f.Reply.SrcPort) + origDstID := endpointNodeID(scope, f.Orig.Dst, f.Orig.DstPort) + if replySrcID != origDstID { + fromID := endpointNodeID(scope, f.Reply.Dst, f.Reply.DstPort) + fromNode, ok := rpt.Endpoint.Nodes[fromID] + if ok && fromNode.Adjacency.Contains(origDstID) { + // replace NAT original destination in adjacencies with the NAT reply source (A) + fromNode.Adjacency = fromNode.Adjacency.Minus(origDstID) + fromNode = fromNode.WithAdjacent(replySrcID) + rpt.Endpoint.Nodes[fromID] = fromNode + } else if replySrcNode, ok := rpt.Endpoint.Nodes[replySrcID]; ok { + // add NAT original destination as a copy of NAT reply source (C) + newNode := replySrcNode.WithID(origDstID).WithLatests(map[string]string{ + CopyOf: replySrcID, + }) + rpt.Endpoint.AddNode(newNode) + } + } + + } }) } diff --git a/probe/endpoint/nat_internal_test.go b/probe/endpoint/nat_internal_test.go index d4f3c4354b..a94d55218f 100644 --- a/probe/endpoint/nat_internal_test.go +++ b/probe/endpoint/nat_internal_test.go @@ -47,10 +47,11 @@ func TestNat(t *testing.T) { { f := conntrack.Conn{ MsgType: conntrack.NfctMsgUpdate, + Status: conntrack.IPS_DST_NAT, Orig: conntrack.Tuple{ Src: host2, Dst: host1, - SrcPort: 22222, + SrcPort: 22223, DstPort: 80, Proto: syscall.IPPROTO_TCP, }, @@ -58,7 +59,7 @@ func TestNat(t *testing.T) { Src: c1, Dst: host2, SrcPort: 80, - DstPort: 22222, + DstPort: 22223, Proto: syscall.IPPROTO_TCP, }, CtId: 1, @@ -73,8 +74,11 @@ func TestNat(t *testing.T) { have.Endpoint.AddNode(report.MakeNodeWith(originalID, map[string]string{ "foo": "bar", })) + fromID := report.MakeEndpointNodeID("host1", "", "2.3.4.5", "22223") + have.Endpoint.AddNode(report.MakeNodeWith(fromID, nil).WithAdjacent(originalID)) want := have.Copy() + // add nat original destination as a copy of nat reply source wantID := report.MakeEndpointNodeID("host1", "", "1.2.3.4", "80") want.Endpoint.AddNode(report.MakeNodeWith(wantID, map[string]string{ CopyOf: originalID, @@ -87,10 +91,11 @@ func TestNat(t *testing.T) { } } - // form the PoV of host2 + // from the PoV of host2 { f := conntrack.Conn{ MsgType: conntrack.NfctMsgUpdate, + Status: conntrack.IPS_SRC_NAT, Orig: conntrack.Tuple{ Src: c2, Dst: host1, @@ -113,17 +118,20 @@ func TestNat(t *testing.T) { have := report.MakeReport() originalID := report.MakeEndpointNodeID("host2", "", "10.0.47.2", "22222") + toID := report.MakeEndpointNodeID("host2", "", "1.2.3.4", "80") + have.Endpoint.AddNode(report.MakeNodeWith(toID, nil)) have.Endpoint.AddNode(report.MakeNodeWith(originalID, map[string]string{ "foo": "baz", - })) + }).WithAdjacent(toID)) + // add NAT reply destination as a copy of NAT original source want := have.Copy() want.Endpoint.AddNode(report.MakeNodeWith(report.MakeEndpointNodeID("host2", "", "2.3.4.5", "22223"), map[string]string{ CopyOf: originalID, "foo": "baz", - })) + }).WithAdjacent(toID)) - makeNATMapper(ct).applyNAT(have, "host1") + makeNATMapper(ct).applyNAT(have, "host2") if !reflect.DeepEqual(want, have) { t.Fatal(test.Diff(want, have)) } diff --git a/prog/probe.go b/prog/probe.go index 3e14d2f376..f495bb145c 100644 --- a/prog/probe.go +++ b/prog/probe.go @@ -266,9 +266,7 @@ func probeMain(flags probeFlags, targets []appclient.Target) { } if flags.dockerEnabled { - // Don't add the bridge in Kubernetes since container IPs are global and - // shouldn't be scoped - if flags.dockerBridge != "" && !flags.kubernetesEnabled { + if flags.dockerBridge != "" { if err := report.AddLocalBridge(flags.dockerBridge); err != nil { log.Errorf("Docker: problem with bridge %s: %v", flags.dockerBridge, err) } diff --git a/report/id_list.go b/report/id_list.go index e74901e7a4..5fa9590f75 100644 --- a/report/id_list.go +++ b/report/id_list.go @@ -36,3 +36,8 @@ func (a IDList) Contains(id string) bool { func (a IDList) Intersection(b IDList) IDList { return IDList(StringSet(a).Intersection(StringSet(b))) } + +// Minus returns the set with id removed +func (a IDList) Minus(id string) IDList { + return IDList(StringSet(a).Minus(id)) +} diff --git a/report/string_set.go b/report/string_set.go index 7580d140ca..e4a6498c13 100644 --- a/report/string_set.go +++ b/report/string_set.go @@ -50,6 +50,15 @@ func (s StringSet) Intersection(b StringSet) StringSet { return result } +// Minus returns the set with str removed +func (s StringSet) Minus(str string) StringSet { + i := sort.Search(len(s), func(i int) bool { return s[i] >= str }) + if i < len(s) && s[i] == str { + return append(s[:i], s[i+1:]...) + } + return s +} + // Equal returns true if a and b have the same contents func (s StringSet) Equal(b StringSet) bool { if len(s) != len(b) {