Skip to content

Commit a028181

Browse files
fix(edgestacks): remove edge stacks even after a system crash or power-off BE-10822 (#208)
1 parent 0f6729f commit a028181

File tree

9 files changed

+116
-17
lines changed

9 files changed

+116
-17
lines changed

agent.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"time"
66

7+
portainer "github.com/portainer/portainer/api"
78
"github.com/portainer/portainer/api/edge"
89
"github.com/portainer/portainer/pkg/libstack"
910
)
@@ -68,6 +69,11 @@ type (
6869
} `json:"Agent"`
6970
}
7071

72+
EdgeStack struct {
73+
ID int
74+
Name string
75+
}
76+
7177
EdgeMetaFields struct {
7278
// EdgeGroupsIDs - Used for AEEC, the created environment will be added to these edge groups
7379
EdgeGroupsIDs []int
@@ -190,6 +196,7 @@ type (
190196
// WaitForStatus waits until status is reached or an error occurred
191197
// if the received value is an empty string it means the status was
192198
WaitForStatus(ctx context.Context, name string, status libstack.Status, options CheckStatusOptions) <-chan libstack.WaitResult
199+
GetEdgeStacks(ctx context.Context) ([]EdgeStack, error)
193200
}
194201

195202
DeployerBaseOptions struct {
@@ -202,7 +209,8 @@ type (
202209

203210
DeployOptions struct {
204211
DeployerBaseOptions
205-
Prune bool
212+
Prune bool
213+
EdgeStackID portainer.EdgeStackID
206214
}
207215

208216
RemoveOptions struct {

edge/poll.go

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package edge
22

33
import (
4+
"context"
45
"encoding/base64"
56
"errors"
67
"math/rand"
@@ -43,6 +44,7 @@ type PollService struct {
4344
tunnelServerAddr string
4445
tunnelServerFingerprint string
4546
tunnelProxy string
47+
firstPoll bool
4648

4749
// Async mode only
4850
pingInterval time.Duration
@@ -99,6 +101,7 @@ func newPollService(edgeManager *Manager, edgeStackManager *stack.StackManager,
99101
tunnelServerFingerprint: config.TunnelServerFingerprint,
100102
tunnelProxy: config.TunnelProxy,
101103
portainerClient: portainerClient,
104+
firstPoll: true,
102105
}
103106

104107
if config.TunnelCapability {
@@ -140,6 +143,7 @@ func (service *PollService) startStatusPollLoop() {
140143
Msg("starting Portainer short-polling client")
141144

142145
lastPollFailed := false
146+
143147
for {
144148
select {
145149
case <-pollCh:
@@ -153,7 +157,7 @@ func (service *PollService) startStatusPollLoop() {
153157

154158
err := service.poll()
155159
if err != nil {
156-
log.Error().Err(err).Msg("an error occured during short poll")
160+
log.Error().Err(err).Msg("an error occurred during short poll")
157161

158162
lastPollFailed = true
159163
service.pollTicker.Reset(time.Duration(service.pollIntervalInSeconds) * time.Second)
@@ -233,9 +237,8 @@ func (service *PollService) poll() error {
233237
Float64("checkin_interval_seconds", environmentStatus.CheckinInterval).
234238
Msg("")
235239

236-
tunnelErr := service.manageUpdateTunnel(*environmentStatus)
237-
if tunnelErr != nil {
238-
return tunnelErr
240+
if err := service.manageUpdateTunnel(*environmentStatus); err != nil {
241+
return err
239242
}
240243

241244
service.processSchedules(environmentStatus.Schedules)
@@ -266,17 +269,15 @@ func (service *PollService) manageUpdateTunnel(environmentStatus client.PollStat
266269
Str("status", environmentStatus.Status).
267270
Msg("idle status detected, shutting down tunnel")
268271

269-
err := service.tunnelClient.CloseTunnel()
270-
if err != nil {
272+
if err := service.tunnelClient.CloseTunnel(); err != nil {
271273
log.Error().Err(err).Msg("unable to shutdown tunnel")
272274
}
273275
}
274276

275277
if environmentStatus.Status == agent.TunnelStatusRequired && !service.tunnelClient.IsTunnelOpen() {
276278
log.Debug().Msg("required status detected, creating reverse tunnel")
277279

278-
err := service.createTunnel(environmentStatus.Credentials, environmentStatus.Port)
279-
if err != nil {
280+
if err := service.createTunnel(environmentStatus.Credentials, environmentStatus.Port); err != nil {
280281
log.Error().Err(err).Msg("unable to create tunnel")
281282

282283
return err
@@ -306,8 +307,7 @@ func (service *PollService) createTunnel(encodedCredentials string, remotePort i
306307
RemotePort: strconv.Itoa(remotePort),
307308
}
308309

309-
err = service.tunnelClient.CreateTunnel(tunnelConfig)
310-
if err != nil {
310+
if err := service.tunnelClient.CreateTunnel(tunnelConfig); err != nil {
311311
return err
312312
}
313313

@@ -316,24 +316,32 @@ func (service *PollService) createTunnel(encodedCredentials string, remotePort i
316316
}
317317

318318
func (service *PollService) processSchedules(schedules []agent.Schedule) {
319-
err := service.scheduleManager.Schedule(schedules)
320-
if err != nil {
319+
if err := service.scheduleManager.Schedule(schedules); err != nil {
321320
log.Error().Err(err).Msg("an error occurred during schedule management")
322321
}
323322
}
324323

325324
func (service *PollService) processStacks(pollResponseStacks []client.StackStatus) error {
326-
if pollResponseStacks == nil {
327-
return nil
325+
// Load existing edge stacks so they can be removed using the initial poll response
326+
if service.firstPoll {
327+
log.Info().Msg("loading the existing edge stacks")
328+
329+
ctx, cancelFn := context.WithTimeout(context.Background(), time.Minute)
330+
defer cancelFn()
331+
332+
if err := service.edgeStackManager.LoadExistingEdgeStacks(ctx); err == nil {
333+
service.firstPoll = false
334+
} else {
335+
log.Warn().Err(err).Msg("unable to retrieve the existing edge stacks")
336+
}
328337
}
329338

330339
stacks := map[int]client.StackStatus{}
331340
for _, s := range pollResponseStacks {
332341
stacks[s.ID] = s
333342
}
334343

335-
err := service.edgeStackManager.UpdateStacksStatus(stacks)
336-
if err != nil {
344+
if err := service.edgeStackManager.UpdateStacksStatus(stacks); err != nil {
337345
log.Error().Err(err).Msg("an error occurred during stack management")
338346

339347
return err

edge/stack/manager.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
package stack
22

33
import (
4+
"context"
45
"fmt"
56
"sync"
67

78
"github.com/portainer/agent"
89
"github.com/portainer/agent/edge/client"
910
"github.com/portainer/agent/exec"
1011
"github.com/portainer/agent/kubernetes"
12+
"github.com/portainer/portainer/api/edge"
13+
1114
"github.com/rs/zerolog/log"
1215
)
1316

@@ -121,6 +124,33 @@ func (manager *StackManager) SetEngineType(engineTyp engineType) error {
121124
return nil
122125
}
123126

127+
// LoadExistingEdgeStacks loads all the edge stacks deployed by Portainer
128+
func (manager *StackManager) LoadExistingEdgeStacks(ctx context.Context) error {
129+
edgeStacks, err := manager.deployer.GetEdgeStacks(ctx)
130+
if err != nil {
131+
return err
132+
}
133+
134+
manager.mu.Lock()
135+
for _, s := range edgeStacks {
136+
if _, found := manager.stacks[edgeStackID(s.ID)]; found {
137+
continue
138+
}
139+
140+
manager.stacks[edgeStackID(s.ID)] = &edgeStack{
141+
StackPayload: edge.StackPayload{
142+
ID: s.ID,
143+
Name: s.Name,
144+
},
145+
Action: actionIdle,
146+
Status: StatusPending,
147+
}
148+
}
149+
manager.mu.Unlock()
150+
151+
return nil
152+
}
153+
124154
func (manager *StackManager) buildDeployerService(assetsPath string, engineStatus engineType) (agent.Deployer, error) {
125155
switch engineStatus {
126156
case EngineTypeDockerStandalone:

edge/stack/stack.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,7 @@ func (manager *StackManager) deployStack(ctx context.Context, stack *edgeStack,
547547
Env: envVars,
548548
Registries: manager.ensureRegCreds(stack),
549549
},
550+
EdgeStackID: portainer.EdgeStackID(stack.ID),
550551
},
551552
)
552553
manager.mu.Lock()

edge/stack/stack_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ func TestStackManager_deployStack(t *testing.T) {
139139

140140
mockPortainerClient.EXPECT().SetEdgeStackStatus(stack.ID, portainer.EdgeStackStatusDeploying, stack.RollbackTo, "").Return(nil)
141141
mockDeployer.EXPECT().Deploy(ctx, stackName, []string{stackFileLocation}, agent.DeployOptions{
142+
EdgeStackID: portainer.EdgeStackID(stack.ID),
142143
DeployerBaseOptions: agent.DeployerBaseOptions{
143144
Namespace: stack.Namespace,
144145
WorkingDir: stack.FileFolder,
@@ -175,6 +176,7 @@ func TestStackManager_deployStack(t *testing.T) {
175176

176177
mockPortainerClient.EXPECT().SetEdgeStackStatus(stack.ID, portainer.EdgeStackStatusDeploying, stack.RollbackTo, "").Return(nil)
177178
mockDeployer.EXPECT().Deploy(ctx, stackName, []string{stackFileLocation}, agent.DeployOptions{
179+
EdgeStackID: portainer.EdgeStackID(stack.ID),
178180
DeployerBaseOptions: agent.DeployerBaseOptions{
179181
Namespace: stack.Namespace,
180182
WorkingDir: stack.FileFolder,
@@ -210,6 +212,7 @@ func TestStackManager_deployStack(t *testing.T) {
210212

211213
mockPortainerClient.EXPECT().SetEdgeStackStatus(stack.ID, portainer.EdgeStackStatusDeploying, stack.RollbackTo, "").Return(nil)
212214
mockDeployer.EXPECT().Deploy(ctx, stackName, []string{stackFileLocation}, agent.DeployOptions{
215+
EdgeStackID: portainer.EdgeStackID(stack.ID),
213216
DeployerBaseOptions: agent.DeployerBaseOptions{
214217
Namespace: stack.Namespace,
215218
WorkingDir: stack.FileFolder,

exec/docker_compose_stack.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package exec
22

33
import (
44
"context"
5+
"strings"
56

67
"github.com/docker/cli/cli/config/types"
78
"github.com/portainer/agent"
@@ -10,6 +11,8 @@ import (
1011
"github.com/portainer/portainer/pkg/libstack/compose"
1112
)
1213

14+
var _ agent.Deployer = &DockerComposeStackService{}
15+
1316
// DockerComposeStackService represents a service for managing stacks by using the Docker binary.
1417
type DockerComposeStackService struct {
1518
deployer libstack.Deployer
@@ -33,6 +36,7 @@ func (service *DockerComposeStackService) Deploy(ctx context.Context, name strin
3336
Registries: registryCredsToAuthConfigs(options.Registries),
3437
},
3538
RemoveOrphans: options.Prune,
39+
EdgeStackID: options.EdgeStackID,
3640
})
3741
}
3842

@@ -69,6 +73,24 @@ func (service *DockerComposeStackService) WaitForStatus(ctx context.Context, nam
6973
return service.deployer.WaitForStatus(ctx, name, status)
7074
}
7175

76+
func (service *DockerComposeStackService) GetEdgeStacks(ctx context.Context) ([]agent.EdgeStack, error) {
77+
var r []agent.EdgeStack
78+
79+
edgeStacks, err := service.deployer.GetExistingEdgeStacks(ctx)
80+
if err != nil {
81+
return nil, err
82+
}
83+
84+
for _, s := range edgeStacks {
85+
// Remove the prefix because it will get added back by the stack manager
86+
s.Name = strings.TrimPrefix(s.Name, "edge_")
87+
88+
r = append(r, agent.EdgeStack(s))
89+
}
90+
91+
return r, nil
92+
}
93+
7294
func registryCredsToAuthConfigs(registryCreds []edge.RegistryCredentials) []types.AuthConfig {
7395
var authConfigs []types.AuthConfig
7496

exec/docker_swarm_stack.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111
"github.com/portainer/portainer/pkg/libstack/compose"
1212
)
1313

14+
var _ agent.Deployer = &DockerSwarmStackService{}
15+
1416
// DockerSwarmStackService represents a service for managing stacks by using the Docker binary.
1517
type DockerSwarmStackService struct {
1618
command string
@@ -90,3 +92,7 @@ func (service *DockerSwarmStackService) Remove(ctx context.Context, name string,
9092

9193
return err
9294
}
95+
96+
func (service *DockerSwarmStackService) GetEdgeStacks(ctx context.Context) ([]agent.EdgeStack, error) {
97+
return nil, nil
98+
}

exec/kubernetes_deploy.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212
"github.com/portainer/agent/kubernetes"
1313
)
1414

15+
var _ agent.Deployer = &KubernetesDeployer{}
16+
1517
// KubernetesDeployer represents a service to deploy resources inside a Kubernetes environment.
1618
type KubernetesDeployer struct {
1719
command string
@@ -90,6 +92,10 @@ func (deployer *KubernetesDeployer) DeployRawConfig(token, config string, namesp
9092
return runCommandAndCaptureStdErr(deployer.command, args, &cmdOpts{Input: config})
9193
}
9294

95+
func (service *KubernetesDeployer) GetEdgeStacks(ctx context.Context) ([]agent.EdgeStack, error) {
96+
return nil, nil
97+
}
98+
9399
type argOptions struct {
94100
Namespace string
95101
Token string

internals/mocks/mock_agent.go

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)