Skip to content

Commit a62d3c8

Browse files
authored
[fix] make VLAN IPs unique across controller pod restarts (#544)
With the VLAN clusters, a in-memory map was maintained for each cluster's IPs. When the controller pod restarts, that IP map is lost, and any further cluster scaling activities cause the nodes to get overlapping IPs.
1 parent 2c91eb4 commit a62d3c8

File tree

3 files changed

+88
-15
lines changed

3 files changed

+88
-15
lines changed

controller/linodemachine_controller_helpers.go

+11-5
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,10 @@ func newCreateConfig(ctx context.Context, machineScope *scope.MachineScope, logg
132132

133133
// if vlan is enabled, attach additional interface as eth0 to linode
134134
if machineScope.LinodeCluster.Spec.Network.UseVlan {
135-
iface := getVlanInterfaceConfig(machineScope, logger)
135+
iface, err := getVlanInterfaceConfig(ctx, machineScope, logger)
136+
if err != nil {
137+
return nil, err
138+
}
136139
if iface != nil {
137140
// add VLAN interface as first interface
138141
createConfig.Interfaces = slices.Insert(createConfig.Interfaces, 0, *iface)
@@ -358,18 +361,21 @@ func getFirewallID(ctx context.Context, machineScope *scope.MachineScope, logger
358361
return *linodeFirewall.Spec.FirewallID, nil
359362
}
360363

361-
func getVlanInterfaceConfig(machineScope *scope.MachineScope, logger logr.Logger) *linodego.InstanceConfigInterfaceCreateOptions {
364+
func getVlanInterfaceConfig(ctx context.Context, machineScope *scope.MachineScope, logger logr.Logger) (*linodego.InstanceConfigInterfaceCreateOptions, error) {
362365
logger = logger.WithValues("vlanName", machineScope.Cluster.Name)
363366

364367
// Try to obtain a IP for the machine using its name
365-
ip := util.GetNextVlanIP(machineScope.Cluster.Name, machineScope.Cluster.Namespace)
366-
logger.Info("obtained IP for machine", "name", machineScope.LinodeMachine.Name, "ip", ip)
368+
ip, err := util.GetNextVlanIP(ctx, machineScope.Cluster.Name, machineScope.Cluster.Namespace, machineScope.Client)
369+
if err != nil {
370+
return nil, fmt.Errorf("getting vlanIP: %w", err)
371+
}
367372

373+
logger.Info("obtained IP for machine", "name", machineScope.LinodeMachine.Name, "ip", ip)
368374
return &linodego.InstanceConfigInterfaceCreateOptions{
369375
Purpose: linodego.InterfacePurposeVLAN,
370376
Label: machineScope.Cluster.Name,
371377
IPAMAddress: fmt.Sprintf(vlanIPFormat, ip),
372-
}
378+
}, nil
373379
}
374380

375381
func getVPCInterfaceConfig(ctx context.Context, machineScope *scope.MachineScope, interfaces []linodego.InstanceConfigInterfaceCreateOptions, logger logr.Logger) (*linodego.InstanceConfigInterfaceCreateOptions, error) {

util/vlanips.go

+56-9
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,19 @@ limitations under the License.
1717
package util
1818

1919
import (
20+
"context"
2021
"fmt"
22+
"net"
2123
"net/netip"
2224
"slices"
2325
"sync"
26+
27+
"k8s.io/apimachinery/pkg/labels"
28+
"k8s.io/apimachinery/pkg/selection"
29+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
30+
"sigs.k8s.io/controller-runtime/pkg/client"
31+
32+
"github.com/linode/cluster-api-provider-linode/api/v1alpha2"
2433
)
2534

2635
var (
@@ -35,16 +44,52 @@ type ClusterIPs struct {
3544
ips []string
3645
}
3746

38-
func getClusterIPs(key string) *ClusterIPs {
47+
func getExistingIPsForCluster(ctx context.Context, clusterName, namespace string, kubeclient client.Client) ([]string, error) {
48+
clusterReq, err := labels.NewRequirement("cluster.x-k8s.io/cluster-name", selection.Equals, []string{clusterName})
49+
if err != nil {
50+
return nil, fmt.Errorf("building label selector: %w", err)
51+
}
52+
53+
selector := labels.NewSelector()
54+
selector = selector.Add(*clusterReq)
55+
var linodeMachineList v1alpha2.LinodeMachineList
56+
err = kubeclient.List(ctx, &linodeMachineList, &client.ListOptions{Namespace: namespace, LabelSelector: selector})
57+
if err != nil {
58+
return nil, fmt.Errorf("listing all linodeMachines %w", err)
59+
}
60+
61+
_, ipnet, err := net.ParseCIDR(vlanIPRange)
62+
if err != nil {
63+
return nil, fmt.Errorf("parsing vlanIPRange: %w", err)
64+
}
65+
66+
existingIPs := []string{}
67+
for _, lm := range linodeMachineList.Items {
68+
for _, addr := range lm.Status.Addresses {
69+
if addr.Type == clusterv1.MachineInternalIP && ipnet.Contains(net.ParseIP(addr.Address)) {
70+
existingIPs = append(existingIPs, addr.Address)
71+
}
72+
}
73+
}
74+
return existingIPs, nil
75+
}
76+
77+
func getClusterIPs(ctx context.Context, clusterName, namespace string, kubeclient client.Client) (*ClusterIPs, error) {
78+
key := fmt.Sprintf("%s.%s", namespace, clusterName)
3979
vlanIPsMu.Lock()
4080
defer vlanIPsMu.Unlock()
41-
ips, exists := vlanIPsMap[key]
81+
clusterIps, exists := vlanIPsMap[key]
4282
if !exists {
43-
ips = &ClusterIPs{
44-
ips: []string{},
83+
ips, err := getExistingIPsForCluster(ctx, clusterName, namespace, kubeclient)
84+
if err != nil {
85+
return nil, fmt.Errorf("getting existingIPs for a cluster: %w", err)
86+
}
87+
clusterIps = &ClusterIPs{
88+
ips: ips,
4589
}
90+
vlanIPsMap[key] = clusterIps
4691
}
47-
return ips
92+
return clusterIps, nil
4893
}
4994

5095
func (c *ClusterIPs) getNextIP() string {
@@ -66,10 +111,12 @@ func (c *ClusterIPs) getNextIP() string {
66111
}
67112

68113
// GetNextVlanIP returns the next available IP for a cluster
69-
func GetNextVlanIP(clusterName, namespace string) string {
70-
key := fmt.Sprintf("%s.%s", namespace, clusterName)
71-
clusterIPs := getClusterIPs(key)
72-
return clusterIPs.getNextIP()
114+
func GetNextVlanIP(ctx context.Context, clusterName, namespace string, kubeclient client.Client) (string, error) {
115+
clusterIPs, err := getClusterIPs(ctx, clusterName, namespace, kubeclient)
116+
if err != nil {
117+
return "", err
118+
}
119+
return clusterIPs.getNextIP(), nil
73120
}
74121

75122
func DeleteClusterIPs(clusterName, namespace string) {

util/vlanips_test.go

+21-1
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,13 @@ limitations under the License.
1717
package util
1818

1919
import (
20+
"context"
2021
"reflect"
2122
"testing"
23+
24+
"go.uber.org/mock/gomock"
25+
26+
"github.com/linode/cluster-api-provider-linode/mock"
2227
)
2328

2429
func TestGetNextVlanIP(t *testing.T) {
@@ -28,27 +33,42 @@ func TestGetNextVlanIP(t *testing.T) {
2833
clusterName string
2934
clusterNamespace string
3035
want string
36+
expects func(mock *mock.MockK8sClient)
3137
}{
3238
{
3339
name: "provide key which exists in map",
3440
clusterName: "test",
3541
clusterNamespace: "testna",
3642
want: "10.0.0.3",
43+
expects: func(mock *mock.MockK8sClient) {
44+
},
3745
},
3846
{
3947
name: "provide key which doesn't exist",
4048
clusterName: "test",
4149
clusterNamespace: "testnonexistent",
4250
want: "10.0.0.1",
51+
expects: func(mock *mock.MockK8sClient) {
52+
mock.EXPECT().List(gomock.Any(), gomock.Any(), gomock.Any()).MinTimes(1)
53+
},
4354
},
4455
}
56+
ctrl := gomock.NewController(t)
57+
defer ctrl.Finish()
58+
mockK8sClient := mock.NewMockK8sClient(ctrl)
59+
4560
for _, tt := range tests {
4661
vlanIPsMap["testna.test"] = &ClusterIPs{
4762
ips: []string{"10.0.0.1", "10.0.0.2"},
4863
}
4964
t.Run(tt.name, func(t *testing.T) {
5065
t.Parallel()
51-
if got := GetNextVlanIP(tt.clusterName, tt.clusterNamespace); !reflect.DeepEqual(got, tt.want) {
66+
tt.expects(mockK8sClient)
67+
got, err := GetNextVlanIP(context.Background(), tt.clusterName, tt.clusterNamespace, mockK8sClient)
68+
if err != nil {
69+
t.Error("error")
70+
}
71+
if !reflect.DeepEqual(got, tt.want) {
5272
t.Errorf("GetNextVlanIP() = %v, want %v", got, tt.want)
5373
}
5474
})

0 commit comments

Comments
 (0)