Skip to content

Commit 405e5ad

Browse files
authored
Maint: handle degraded api when creating k8sclient (#426)
* Maint: handle degraded api when creating k8sclient * Review fixes * Use backplane general error string * Fix linter errors
1 parent 6ee523c commit 405e5ad

File tree

7 files changed

+103
-21
lines changed

7 files changed

+103
-21
lines changed

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ require (
2222
github.com/prometheus/client_golang v1.22.0
2323
github.com/prometheus/common v0.63.0
2424
github.com/spf13/cobra v1.8.1
25+
github.com/stretchr/testify v1.10.0
2526
go.uber.org/mock v0.4.0
2627
go.uber.org/zap v1.27.0
2728
gopkg.in/yaml.v2 v2.4.0
@@ -122,6 +123,7 @@ require (
122123
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
123124
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect
124125
github.com/pkg/errors v0.9.1 // indirect
126+
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
125127
github.com/prometheus/client_model v0.6.1 // indirect
126128
github.com/prometheus/procfs v0.15.1 // indirect
127129
github.com/sagikazarmark/locafero v0.7.0 // indirect

pkg/investigations/clustermonitoringerrorbudgetburn/clustermonitoringerrorbudgetburn.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ func (c *Investigation) Run(r *investigation.Resources) (result investigation.In
3636
// patching the existing RBAC etc...
3737
k8scli, err := k8sclient.New(r.Cluster.ID(), r.OcmClient, r.Name)
3838
if err != nil {
39+
if errors.Is(err, k8sclient.ErrAPIServerUnavailable) {
40+
return result, r.PdClient.EscalateIncidentWithNote("CAD was unable to access cluster's kube-api. Please investigate manually.")
41+
}
42+
3943
return result, fmt.Errorf("unable to initialize k8s cli: %w", err)
4044
}
4145
defer func() {

pkg/investigations/insightsoperatordown/insightsoperatordown.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ func (c *Investigation) Run(r *investigation.Resources) (investigation.Investiga
3838
// We continue with the next step OCPBUG22226 even if the user is banned.
3939
k8scli, err := k8sclient.New(r.Cluster.ID(), r.OcmClient, r.Name)
4040
if err != nil {
41+
if errors.Is(err, k8sclient.ErrAPIServerUnavailable) {
42+
return result, r.PdClient.EscalateIncidentWithNote("CAD was unable to access cluster's kube-api. Please investigate manually.")
43+
}
44+
4145
return result, fmt.Errorf("unable to initialize k8s cli: %w", err)
4246
}
4347
defer func() {

pkg/k8s/client.go

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,29 @@ import (
44
"fmt"
55
"os"
66

7-
configv1 "github.com/openshift/api/config/v1"
87
"github.com/openshift/backplane-cli/pkg/cli/config"
98
bpremediation "github.com/openshift/backplane-cli/pkg/remediation"
109
"github.com/openshift/configuration-anomaly-detection/pkg/ocm"
11-
corev1 "k8s.io/api/core/v1"
12-
"k8s.io/apimachinery/pkg/runtime"
1310
"sigs.k8s.io/controller-runtime/pkg/client"
1411
)
1512

13+
// New returns a Kubernetes client for the given cluster scoped to a given remediation's permissions.
1614
func New(clusterID string, ocmClient ocm.Client, remediation string) (client.Client, error) {
1715
backplaneURL := os.Getenv("BACKPLANE_URL")
1816
if backplaneURL == "" {
1917
return nil, fmt.Errorf("could not create new k8sclient: missing environment variable BACKPLANE_URL")
2018
}
2119

22-
cfg, err := bpremediation.CreateRemediationWithConn(config.BackplaneConfiguration{URL: backplaneURL}, ocmClient.GetConnection(), clusterID, remediation)
20+
cfg, err := bpremediation.CreateRemediationWithConn(
21+
config.BackplaneConfiguration{URL: backplaneURL},
22+
ocmClient.GetConnection(),
23+
clusterID,
24+
remediation,
25+
)
2326
if err != nil {
27+
if isAPIServerUnavailable(err) {
28+
return nil, fmt.Errorf("%w: %w", ErrAPIServerUnavailable, err)
29+
}
2430
return nil, err
2531
}
2632

@@ -32,26 +38,17 @@ func New(clusterID string, ocmClient ocm.Client, remediation string) (client.Cli
3238
return client.New(cfg, client.Options{Scheme: scheme})
3339
}
3440

41+
// Cleanup removes the remediation created for the cluster.
3542
func Cleanup(clusterID string, ocmClient ocm.Client, remediation string) error {
3643
backplaneURL := os.Getenv("BACKPLANE_URL")
3744
if backplaneURL == "" {
38-
return fmt.Errorf("could not create new k8sclient: missing environment variable BACKPLANE_URL")
45+
return fmt.Errorf("could not clean up k8sclient: missing environment variable BACKPLANE_URL")
3946
}
40-
return bpremediation.DeleteRemediationWithConn(config.BackplaneConfiguration{URL: backplaneURL}, ocmClient.GetConnection(), clusterID, remediation)
41-
}
42-
43-
// Initialize all apis we need in CAD
44-
func initScheme() (*runtime.Scheme, error) {
45-
scheme := runtime.NewScheme()
4647

47-
// Add corev1 to scheme for core k8s
48-
if err := corev1.AddToScheme(scheme); err != nil {
49-
return nil, fmt.Errorf("unable to add corev1 scheme: %w", err)
50-
}
51-
52-
// Add config.openshift.io/v1 to scheme for clusteroperator
53-
if err := configv1.Install(scheme); err != nil {
54-
return nil, fmt.Errorf("unable to add openshift/api/config scheme: %w", err)
55-
}
56-
return scheme, nil
48+
return bpremediation.DeleteRemediationWithConn(
49+
config.BackplaneConfiguration{URL: backplaneURL},
50+
ocmClient.GetConnection(),
51+
clusterID,
52+
remediation,
53+
)
5754
}

pkg/k8s/errors.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package k8sclient
2+
3+
import (
4+
"errors"
5+
"strings"
6+
)
7+
8+
var ErrAPIServerUnavailable = errors.New("kubernetes API server unavailable")
9+
10+
// isAPIServerUnavailable detects common symptoms of an unreachable API server.
11+
func isAPIServerUnavailable(err error) bool {
12+
errStr := err.Error()
13+
return strings.Contains(errStr, "The cluster could be down or under heavy load")
14+
}

pkg/k8s/errors_test.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package k8sclient
2+
3+
import (
4+
"errors"
5+
"testing"
6+
)
7+
8+
func TestIsAPIServerUnavailable(t *testing.T) {
9+
tests := []struct {
10+
name string
11+
err error
12+
expected bool
13+
}{
14+
{
15+
name: "Cluster down message present",
16+
err: errors.New(`Error: Internal error occurred: failed calling webhook "namespace.operator.tekton.dev": failed to call webhook: Post "https://tekton-operator-proxy-webhook.openshift-pipelines.svc:443/namespace-validation?timeout=10s": context deadline exceeded
17+
The cluster could be down or under heavy load
18+
`),
19+
expected: true,
20+
},
21+
{
22+
name: "Unrelated error message",
23+
err: errors.New("some other error occurred"),
24+
expected: false,
25+
},
26+
}
27+
28+
for _, tt := range tests {
29+
t.Run(tt.name, func(t *testing.T) {
30+
if tt.err == nil && isAPIServerUnavailable(tt.err) {
31+
t.Errorf("Expected false for nil error, but got true")
32+
} else if tt.err != nil && isAPIServerUnavailable(tt.err) != tt.expected {
33+
t.Errorf("For test '%s', expected %v, got %v", tt.name, tt.expected, !tt.expected)
34+
}
35+
})
36+
}
37+
}

pkg/k8s/scheme.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package k8sclient
2+
3+
import (
4+
"fmt"
5+
6+
configv1 "github.com/openshift/api/config/v1"
7+
corev1 "k8s.io/api/core/v1"
8+
"k8s.io/apimachinery/pkg/runtime"
9+
)
10+
11+
// initScheme initializes the runtime scheme with required APIs.
12+
func initScheme() (*runtime.Scheme, error) {
13+
scheme := runtime.NewScheme()
14+
15+
if err := corev1.AddToScheme(scheme); err != nil {
16+
return nil, fmt.Errorf("unable to add corev1 scheme: %w", err)
17+
}
18+
19+
if err := configv1.Install(scheme); err != nil {
20+
return nil, fmt.Errorf("unable to add config.openshift.io/v1 scheme: %w", err)
21+
}
22+
23+
return scheme, nil
24+
}

0 commit comments

Comments
 (0)