Skip to content

Commit da70aaf

Browse files
author
Akshay Kumar
committed
SKE node, node_info, node_lifecycle controller changes
1 parent fec481a commit da70aaf

22 files changed

+789
-143
lines changed

hack/existing-cluster-dev0-env-template.sh

+4
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,7 @@ export SCOPE="BOTH"
5656
# FSS volume handle
5757
# format is FileSystemOCID:serverIP:path
5858
export FSS_VOLUME_HANDLE="ocid1.filesystem.oc1.iad.aaaaaaaaaaa5wj2infqwillqojxwiotjmfsc2ylefuzqaaaa:10.0.10.104:/FileSystem-20210820-0454-50"
59+
60+
# For SKE node, node_info, node_lifecycle controller tests against PDE
61+
# To setup PDE and point your localhost:25000 to the PDE CP API refer: Refer: https://bitbucket.oci.oraclecorp.com/projects/OKE/repos/oke-control-plane/browse/personal-environments/README.md
62+
# export CE_ENDPOINT_OVERRIDE="http://localhost:25000"

pkg/cloudprovider/providers/oci/ccm.go

+16-8
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import (
4545
"github.com/oracle/oci-cloud-controller-manager/pkg/oci/client"
4646
"github.com/oracle/oci-cloud-controller-manager/pkg/oci/instance/metadata"
4747
"github.com/oracle/oci-go-sdk/v65/common"
48+
"github.com/oracle/oci-go-sdk/v65/containerengine"
4849
"github.com/oracle/oci-go-sdk/v65/core"
4950
)
5051

@@ -79,9 +80,10 @@ type CloudProvider struct {
7980
securityListManagerFactory securityListManagerFactory
8081
config *providercfg.Config
8182

82-
logger *zap.SugaredLogger
83-
instanceCache cache.Store
84-
metricPusher *metrics.MetricPusher
83+
logger *zap.SugaredLogger
84+
instanceCache cache.Store
85+
virtualNodeCache cache.Store
86+
metricPusher *metrics.MetricPusher
8587
}
8688

8789
func (cp *CloudProvider) InstancesV2() (cloudprovider.InstancesV2, bool) {
@@ -144,11 +146,12 @@ func NewCloudProvider(config *providercfg.Config) (cloudprovider.Interface, erro
144146
}
145147

146148
return &CloudProvider{
147-
client: c,
148-
config: config,
149-
logger: logger.Sugar(),
150-
instanceCache: cache.NewTTLStore(instanceCacheKeyFn, time.Duration(24)*time.Hour),
151-
metricPusher: metricPusher,
149+
client: c,
150+
config: config,
151+
logger: logger.Sugar(),
152+
instanceCache: cache.NewTTLStore(instanceCacheKeyFn, time.Duration(24)*time.Hour),
153+
virtualNodeCache: cache.NewTTLStore(virtualNodeCacheKeyFn, time.Duration(24)*time.Hour),
154+
metricPusher: metricPusher,
152155
}, nil
153156
}
154157

@@ -184,6 +187,7 @@ func (cp *CloudProvider) Initialize(clientBuilder cloudprovider.ControllerClient
184187
cp,
185188
cp.logger,
186189
cp.instanceCache,
190+
cp.virtualNodeCache,
187191
cp.client)
188192

189193
nodeInformer := factory.Core().V1().Nodes()
@@ -268,6 +272,10 @@ func instanceCacheKeyFn(obj interface{}) (string, error) {
268272
return *obj.(*core.Instance).Id, nil
269273
}
270274

275+
func virtualNodeCacheKeyFn(obj interface{}) (string, error) {
276+
return *obj.(*containerengine.VirtualNode).Id, nil
277+
}
278+
271279
func StartOciServiceControllerWrapper(initContext cloudControllerManager.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) cloudControllerManager.InitFunc {
272280
return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) {
273281
return startOciServiceController(ctx, initContext, completedConfig, cloud)

pkg/cloudprovider/providers/oci/instances.go

+99-21
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,19 @@ import (
1919
"fmt"
2020
"net"
2121

22-
"github.com/oracle/oci-go-sdk/v65/core"
23-
"k8s.io/apimachinery/pkg/labels"
24-
25-
"github.com/oracle/oci-cloud-controller-manager/pkg/oci/client"
2622
"github.com/pkg/errors"
2723
api "k8s.io/api/core/v1"
24+
"k8s.io/apimachinery/pkg/labels"
2825
"k8s.io/apimachinery/pkg/types"
2926
cloudprovider "k8s.io/cloud-provider"
27+
28+
"github.com/oracle/oci-cloud-controller-manager/pkg/oci/client"
29+
"github.com/oracle/oci-go-sdk/v65/containerengine"
30+
"github.com/oracle/oci-go-sdk/v65/core"
31+
)
32+
33+
const (
34+
VirtualNodePoolIdAnnotation = "oci.oraclecloud.com/virtual-node-pool-id"
3035
)
3136

3237
var _ cloudprovider.Instances = &CloudProvider{}
@@ -50,7 +55,7 @@ func (cp *CloudProvider) getCompartmentIDByInstanceID(instanceID string) (string
5055
return "", errors.Wrap(err, "error listing all the nodes using node informer")
5156
}
5257
for _, node := range nodeList {
53-
providerID, err := MapProviderIDToInstanceID(node.Spec.ProviderID)
58+
providerID, err := MapProviderIDToResourceID(node.Spec.ProviderID)
5459
if err != nil {
5560
return "", errors.New("Failed to map providerID to instanceID")
5661
}
@@ -146,14 +151,18 @@ func (cp *CloudProvider) NodeAddresses(ctx context.Context, name types.NodeName)
146151
// nodeaddresses are being queried. i.e. local metadata services cannot be used
147152
// in this method to obtain nodeaddresses.
148153
func (cp *CloudProvider) NodeAddressesByProviderID(ctx context.Context, providerID string) ([]api.NodeAddress, error) {
149-
cp.logger.With("instanceID", providerID).Debug("Getting node addresses by provider id")
154+
cp.logger.With("resourceID", providerID).Debug("Getting node addresses by provider id")
150155

151-
instanceID, err := MapProviderIDToInstanceID(providerID)
156+
resourceID, err := MapProviderIDToResourceID(providerID)
152157
if err != nil {
153-
return nil, errors.Wrap(err, "MapProviderIDToInstanceID")
158+
return nil, errors.Wrap(err, "MapProviderIDToResourceOCID")
154159
}
155-
return cp.extractNodeAddresses(ctx, instanceID)
156160

161+
if IsVirtualNodeId(resourceID) {
162+
return []api.NodeAddress{}, nil
163+
}
164+
165+
return cp.extractNodeAddresses(ctx, resourceID)
157166
}
158167

159168
// InstanceID returns the cloud provider ID of the node with the specified NodeName.
@@ -191,21 +200,27 @@ func (cp *CloudProvider) InstanceType(ctx context.Context, name types.NodeName)
191200

192201
// InstanceTypeByProviderID returns the type of the specified instance.
193202
func (cp *CloudProvider) InstanceTypeByProviderID(ctx context.Context, providerID string) (string, error) {
194-
cp.logger.With("instanceID", providerID).Debug("Getting instance type by provider id")
203+
cp.logger.With("resourceID", providerID).Debug("Getting instance type by provider id")
195204

196-
instanceID, err := MapProviderIDToInstanceID(providerID)
205+
resourceID, err := MapProviderIDToResourceID(providerID)
197206
if err != nil {
198-
return "", errors.Wrap(err, "MapProviderIDToInstanceID")
207+
return "", errors.Wrap(err, "MapProviderIDToResourceOCID")
199208
}
200-
item, exists, err := cp.instanceCache.GetByKey(instanceID)
209+
210+
if IsVirtualNodeId(resourceID) {
211+
// Virtual nodes don't have an instance type, return empty string
212+
return "", nil
213+
}
214+
215+
item, exists, err := cp.instanceCache.GetByKey(resourceID)
201216
if err != nil {
202217
return "", errors.Wrap(err, "error fetching instance from instanceCache, will retry")
203218
}
204219
if exists {
205220
return *item.(*core.Instance).Shape, nil
206221
}
207222
cp.logger.Debug("Unable to find the instance information from instanceCache. Calling OCI API")
208-
inst, err := cp.client.Compute().GetInstance(ctx, instanceID)
223+
inst, err := cp.client.Compute().GetInstance(ctx, resourceID)
209224
if err != nil {
210225
return "", errors.Wrap(err, "GetInstance")
211226
}
@@ -232,13 +247,18 @@ func (cp *CloudProvider) CurrentNodeName(ctx context.Context, hostname string) (
232247
// provider id still is running. If false is returned with no error, the
233248
// instance will be immediately deleted by the cloud controller manager.
234249
func (cp *CloudProvider) InstanceExistsByProviderID(ctx context.Context, providerID string) (bool, error) {
235-
//Please do not try to optimise it by using InstanceCache because we prefer correctness over efficiency here
236-
cp.logger.With("instanceID", providerID).Debug("Checking instance exists by provider id")
237-
instanceID, err := MapProviderIDToInstanceID(providerID)
250+
//Please do not try to optimise it by using Cache because we prefer correctness over efficiency here
251+
cp.logger.With("resourceID", providerID).Debug("Checking instance exists by provider id")
252+
resourceID, err := MapProviderIDToResourceID(providerID)
238253
if err != nil {
239254
return false, err
240255
}
241-
instance, err := cp.client.Compute().GetInstance(ctx, instanceID)
256+
257+
if IsVirtualNodeId(resourceID) {
258+
return cp.virtualNodeExistsByResourceID(ctx, resourceID)
259+
}
260+
261+
instance, err := cp.client.Compute().GetInstance(ctx, resourceID)
242262
if client.IsNotFound(err) {
243263
return false, nil
244264
}
@@ -252,13 +272,18 @@ func (cp *CloudProvider) InstanceExistsByProviderID(ctx context.Context, provide
252272
// InstanceShutdownByProviderID returns true if the instance is shutdown in cloudprovider.
253273
func (cp *CloudProvider) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
254274
//Please do not try to optimise it by using InstanceCache because we prefer correctness over efficiency here
255-
cp.logger.With("instanceID", providerID).Debug("Checking instance is stopped by provider id")
256-
instanceID, err := MapProviderIDToInstanceID(providerID)
275+
cp.logger.With("resourceID", providerID).Debug("Checking instance is stopped by provider id")
276+
resourceID, err := MapProviderIDToResourceID(providerID)
257277
if err != nil {
258278
return false, err
259279
}
260280

261-
instance, err := cp.client.Compute().GetInstance(ctx, instanceID)
281+
if IsVirtualNodeId(resourceID) {
282+
// This does not apply to virtual nodes
283+
return false, nil
284+
}
285+
286+
instance, err := cp.client.Compute().GetInstance(ctx, resourceID)
262287
if err != nil {
263288
return false, err
264289
}
@@ -280,3 +305,56 @@ func (cp *CloudProvider) getCompartmentIDByNodeName(nodeName string) (string, er
280305
cp.logger.Debug("CompartmentID annotation is not present")
281306
return "", errors.New("compartmentID annotation missing in the node. Would retry")
282307
}
308+
309+
func (cp *CloudProvider) getVirtualNodePoolIdByVirtualNodeId(virtualNodeId string) (string, error) {
310+
nodeList, err := cp.NodeLister.List(labels.Everything())
311+
if err != nil {
312+
return "", errors.Wrap(err, "error listing nodes using node informer")
313+
}
314+
for _, node := range nodeList {
315+
resourceID, err := MapProviderIDToResourceID(node.Spec.ProviderID)
316+
if err != nil {
317+
// If providerId is empty ignore this node
318+
continue
319+
}
320+
if virtualNodeId == resourceID {
321+
if virtualNodePoolId, ok := node.Annotations[VirtualNodePoolIdAnnotation]; ok && virtualNodePoolId != "" {
322+
return virtualNodePoolId, nil
323+
}
324+
}
325+
}
326+
return "", errors.Errorf("could not get virtualNodePoolId for virtualNodeId %s, annotation missing", virtualNodeId)
327+
}
328+
329+
func (cp *CloudProvider) virtualNodeExistsByResourceID(ctx context.Context, resourceID string) (bool, error) {
330+
item, exists, err := cp.virtualNodeCache.GetByKey(resourceID)
331+
if err != nil {
332+
return false, errors.Wrap(err, "Error fetching virtual node from virtualNodeCache, will retry")
333+
}
334+
335+
var virtualNodePoolId string
336+
if exists {
337+
virtualNodePoolId = *item.(*containerengine.VirtualNode).VirtualNodePoolId
338+
} else {
339+
virtualNodePoolId, err = cp.getVirtualNodePoolIdByVirtualNodeId(resourceID)
340+
if err != nil {
341+
return false, err
342+
}
343+
}
344+
345+
virtualNode, err := cp.client.ContainerEngine().GetVirtualNode(ctx, resourceID, virtualNodePoolId)
346+
if client.IsNotFound(err) {
347+
return false, nil
348+
}
349+
if err != nil {
350+
return false, err
351+
}
352+
353+
if !exists {
354+
if err := cp.virtualNodeCache.Add(virtualNode); err != nil {
355+
return false, errors.Wrap(err, "Failed to add virtual node in virtualNodeCache")
356+
}
357+
}
358+
359+
return !client.IsVirtualNodeInTerminalState(virtualNode), nil
360+
}

0 commit comments

Comments
 (0)