Skip to content

Commit d502d77

Browse files
committed
Merge branch '546-diagnostic-log-bloating' into 'master'
fix: stop bloating diagnostic logs (#546) Closes #546 See merge request postgres-ai/database-lab!827
2 parents d8b0c81 + 711a646 commit d502d77

File tree

7 files changed

+99
-16
lines changed

7 files changed

+99
-16
lines changed

Diff for: engine/.gitlab-ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ lint:
5656
### Build binary.
5757
build-binary-alpine:
5858
<<: *only_engine
59-
image: golang:1.18-alpine
59+
image: golang:1.20-alpine
6060
stage: build-binary
6161
artifacts:
6262
paths:

Diff for: engine/internal/diagnostic/logs.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ func CollectDiagnostics(ctx context.Context, client *client.Client, filterArgs f
7272
}
7373

7474
// CollectContainerDiagnostics collect specific container diagnostics information.
75-
func CollectContainerDiagnostics(ctx context.Context, client *client.Client, containerName string) {
75+
func CollectContainerDiagnostics(ctx context.Context, client *client.Client, containerName, dbDataDir string) {
7676
diagnosticsDir, err := util.GetLogsPath(time.Now().Format(timeFormat))
7777

7878
if err != nil {
@@ -89,6 +89,12 @@ func CollectContainerDiagnostics(ctx context.Context, client *client.Client, con
8989
if err != nil {
9090
log.Warn("Failed to collect container logs ", containerName, err)
9191
}
92+
93+
err = collectPostgresLogs(ctx, client, diagnosticsDir, containerName, dbDataDir)
94+
95+
if err != nil {
96+
log.Warn("Failed to collect Postgres logs ", containerName, err)
97+
}
9298
}
9399

94100
func collectContainersOutput(ctx context.Context, client *client.Client, diagnosticDir string, filterArgs filters.Args) error {

Diff for: engine/internal/provision/databases/postgres/postgres.go

+5-12
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313
"strings"
1414
"time"
1515

16-
"github.com/docker/docker/api/types/filters"
1716
"github.com/docker/docker/client"
1817

1918
_ "github.com/lib/pq" // Register Postgres database driver.
@@ -61,6 +60,8 @@ func Start(r runners.Runner, c *resources.AppConfig) error {
6160
return errors.Wrap(err, "failed to run container")
6261
}
6362

63+
log.Dbg("Container has been started. Running Postgres...")
64+
6465
// Waiting for server to become ready and promote if needed.
6566
first := true
6667
cnt := 0
@@ -112,7 +113,7 @@ func Start(r runners.Runner, c *resources.AppConfig) error {
112113
cnt++
113114

114115
if cnt > waitPostgresTimeout {
115-
err := collectDiagnostics(c)
116+
collectDiagnostics(c)
116117

117118
if runnerErr := Stop(r, c.Pool, c.CloneName); runnerErr != nil {
118119
log.Err(runnerErr)
@@ -127,21 +128,13 @@ func Start(r runners.Runner, c *resources.AppConfig) error {
127128
return nil
128129
}
129130

130-
func collectDiagnostics(c *resources.AppConfig) error {
131+
func collectDiagnostics(c *resources.AppConfig) {
131132
dockerClient, err := client.NewClientWithOpts(client.FromEnv)
132133
if err != nil {
133134
log.Fatal("Failed to create a Docker client:", err)
134135
}
135136

136-
filterArgs := filters.NewArgs(
137-
filters.KeyValuePair{Key: "label",
138-
Value: fmt.Sprintf("%s=%s", docker.LabelClone, c.Pool.Name)})
139-
140-
if err := diagnostic.CollectDiagnostics(context.Background(), dockerClient, filterArgs, c.CloneName, c.DataDir()); err != nil {
141-
log.Err("Failed to collect container diagnostics", err)
142-
}
143-
144-
return err
137+
diagnostic.CollectContainerDiagnostics(context.Background(), dockerClient, c.CloneName, c.DataDir())
145138
}
146139

147140
// Stop stops Postgres instance.

Diff for: engine/internal/provision/mode_local.go

+9
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"gitlab.com/postgres-ai/database-lab/v3/internal/provision/resources"
3232
"gitlab.com/postgres-ai/database-lab/v3/internal/provision/runners"
3333
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/engine/postgres/tools"
34+
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/engine/postgres/tools/fs"
3435
"gitlab.com/postgres-ai/database-lab/v3/pkg/log"
3536
"gitlab.com/postgres-ai/database-lab/v3/pkg/models"
3637
"gitlab.com/postgres-ai/database-lab/v3/pkg/util"
@@ -188,6 +189,10 @@ func (p *Provisioner) StartSession(snapshotID string, user resources.EphemeralUs
188189
appConfig := p.getAppConfig(fsm.Pool(), name, port)
189190
appConfig.SetExtraConf(extraConfig)
190191

192+
if err := fs.CleanupLogsDir(appConfig.DataDir()); err != nil {
193+
log.Warn("Failed to clean up logs directory:", err.Error())
194+
}
195+
191196
if err = postgres.Start(p.runner, appConfig); err != nil {
192197
return nil, errors.Wrap(err, "failed to start a container")
193198
}
@@ -284,6 +289,10 @@ func (p *Provisioner) ResetSession(session *resources.Session, snapshotID string
284289
appConfig := p.getAppConfig(newFSManager.Pool(), name, session.Port)
285290
appConfig.SetExtraConf(session.ExtraConfig)
286291

292+
if err := fs.CleanupLogsDir(appConfig.DataDir()); err != nil {
293+
log.Warn("Failed to clean up logs directory:", err.Error())
294+
}
295+
287296
if err = postgres.Start(p.runner, appConfig); err != nil {
288297
return nil, errors.Wrap(err, "failed to start container")
289298
}

Diff for: engine/internal/retrieval/engine/postgres/snapshot/physical.go

+50-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"context"
1212
"fmt"
1313
"io"
14+
"os"
1415
"path"
1516
"strings"
1617
"sync"
@@ -37,6 +38,7 @@ import (
3738
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/engine/postgres/tools/activity"
3839
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/engine/postgres/tools/cont"
3940
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/engine/postgres/tools/defaults"
41+
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/engine/postgres/tools/fs"
4042
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/engine/postgres/tools/health"
4143
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/engine/postgres/tools/pgtool"
4244
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/engine/postgres/tools/query"
@@ -65,6 +67,9 @@ const (
6567
// WAL parsing constants.
6668
walNameLen = 24
6769
pgVersion10 = 10
70+
71+
logDirName = "log"
72+
defaultLogRetentionDays = 7
6873
)
6974

7075
var defaultRecoveryCfg = map[string]string{
@@ -351,6 +356,11 @@ func (p *PhysicalInitial) run(ctx context.Context) (err error) {
351356
return errors.Wrapf(err, "failed to create \"pre\" clone %s", cloneName)
352357
}
353358

359+
cloneDataDir := path.Join(p.fsPool.ClonesDir(), cloneName, p.fsPool.DataSubDir)
360+
if err := fs.CleanupLogsDir(cloneDataDir); err != nil {
361+
log.Warn("Failed to clean up logs directory:", err.Error())
362+
}
363+
354364
defer func() {
355365
if err != nil {
356366
if errDestroy := p.cloneManager.DestroyClone(cloneName); errDestroy != nil {
@@ -361,7 +371,7 @@ func (p *PhysicalInitial) run(ctx context.Context) (err error) {
361371

362372
// Promotion.
363373
if p.options.Promotion.Enabled {
364-
if err := p.promoteInstance(ctx, path.Join(p.fsPool.ClonesDir(), cloneName, p.fsPool.DataSubDir), syState); err != nil {
374+
if err := p.promoteInstance(ctx, cloneDataDir, syState); err != nil {
365375
return errors.Wrap(err, "failed to promote instance")
366376
}
367377
}
@@ -387,6 +397,45 @@ func (p *PhysicalInitial) run(ctx context.Context) (err error) {
387397

388398
p.tm.SendEvent(ctx, telemetry.SnapshotCreatedEvent, telemetry.SnapshotCreated{})
389399

400+
if err := p.cleanupOldLogs(); err != nil {
401+
log.Warn("cannot clean up old logs", err.Error())
402+
}
403+
404+
return nil
405+
}
406+
407+
func (p *PhysicalInitial) cleanupOldLogs() error {
408+
lastWeekTime := time.Now().AddDate(0, 0, -1*defaultLogRetentionDays)
409+
410+
log.Dbg("Cleaning up PGDATA logs older than", lastWeekTime.Format(time.DateTime))
411+
412+
logDir := path.Join(p.fsPool.DataDir(), logDirName)
413+
414+
dirEntries, err := os.ReadDir(logDir)
415+
if err != nil {
416+
return err
417+
}
418+
419+
var fileCounter int
420+
421+
for _, logFile := range dirEntries {
422+
info, err := logFile.Info()
423+
if err != nil {
424+
continue
425+
}
426+
427+
if info.ModTime().Before(lastWeekTime) {
428+
logFilename := path.Join(logDir, logFile.Name())
429+
if err := os.RemoveAll(logFilename); err != nil {
430+
log.Warn("cannot remove old log file %s: %s", logFilename, err.Error())
431+
}
432+
433+
fileCounter++
434+
}
435+
}
436+
437+
log.Dbg("Old PGDATA logs have been cleaned. Number of deleted files: ", fileCounter)
438+
390439
return nil
391440
}
392441

Diff for: engine/internal/retrieval/engine/postgres/tools/fs/tools.go

+25
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,17 @@
66
package fs
77

88
import (
9+
"fmt"
910
"io"
1011
"os"
12+
"path"
1113
"path/filepath"
1214
)
1315

16+
const (
17+
logDirectory = "log"
18+
)
19+
1420
// CopyDirectoryContent copies all files from one directory to another.
1521
func CopyDirectoryContent(sourceDir, dataDir string) error {
1622
entries, err := os.ReadDir(sourceDir)
@@ -73,3 +79,22 @@ func AppendFile(file string, data []byte) error {
7379

7480
return nil
7581
}
82+
83+
// CleanupLogsDir removes old log files from the clone directory.
84+
func CleanupLogsDir(dataDir string) error {
85+
logPath := path.Join(dataDir, logDirectory)
86+
87+
logDir, err := os.ReadDir(logPath)
88+
if err != nil {
89+
return fmt.Errorf("cannot read directory %s: %v", logPath, err.Error())
90+
}
91+
92+
for _, logFile := range logDir {
93+
logName := path.Join(logPath, logFile.Name())
94+
if err := os.RemoveAll(logName); err != nil {
95+
return fmt.Errorf("cannot remove %s: %v", logName, err.Error())
96+
}
97+
}
98+
99+
return nil
100+
}

Diff for: engine/internal/retrieval/engine/postgres/tools/tools.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"time"
2121

2222
"github.com/AlekSi/pointer"
23+
"github.com/ahmetalpbalkan/dlog"
2324
"github.com/docker/cli/cli/streams"
2425
"github.com/docker/docker/api/types"
2526
"github.com/docker/docker/api/types/container"
@@ -682,7 +683,7 @@ func CopyContainerLogs(ctx context.Context, docker *client.Client, containerName
682683
}
683684
}()
684685

685-
if _, err := io.Copy(writeFile, reader); err != nil {
686+
if _, err := io.Copy(writeFile, dlog.NewReader(reader)); err != nil {
686687
return fmt.Errorf("failed to copy container output %w", err)
687688
}
688689

0 commit comments

Comments
 (0)