@@ -298,6 +298,14 @@ func (p *PhysicalInitial) run(ctx context.Context) (err error) {
298
298
}
299
299
}()
300
300
301
+ var syncErr error
302
+
303
+ if p .options .Promotion .Enabled {
304
+ if syncErr = p .checkSyncInstance (ctx ); syncErr != nil {
305
+ log .Dbg (fmt .Sprintf ("failed to check the sync instance before snapshotting: %v" , syncErr ), "Changing the promotion strategy" )
306
+ }
307
+ }
308
+
301
309
// Prepare pre-snapshot.
302
310
snapshotName , err := p .cloneManager .CreateSnapshot ("" , preDataStateAt + pre )
303
311
if err != nil {
@@ -326,7 +334,7 @@ func (p *PhysicalInitial) run(ctx context.Context) (err error) {
326
334
327
335
// Promotion.
328
336
if p .options .Promotion .Enabled {
329
- if err := p .promoteInstance (ctx , path .Join (p .fsPool .ClonesDir (), cloneName , p .fsPool .DataSubDir )); err != nil {
337
+ if err := p .promoteInstance (ctx , path .Join (p .fsPool .ClonesDir (), cloneName , p .fsPool .DataSubDir ), syncErr ); err != nil {
330
338
return errors .Wrap (err , "failed to promote instance" )
331
339
}
332
340
}
@@ -353,6 +361,29 @@ func (p *PhysicalInitial) run(ctx context.Context) (err error) {
353
361
return nil
354
362
}
355
363
364
+ func (p * PhysicalInitial ) checkSyncInstance (ctx context.Context ) error {
365
+ syncContainer , err := p .dockerClient .ContainerInspect (ctx , p .syncInstanceName ())
366
+ if err != nil {
367
+ return err
368
+ }
369
+
370
+ if err := tools .CheckContainerReadiness (ctx , p .dockerClient , syncContainer .ID ); err != nil {
371
+ return errors .Wrap (err , "failed to readiness check" )
372
+ }
373
+
374
+ log .Msg ("Sync instance has been checked. It is running" )
375
+
376
+ if err := p .checkpoint (ctx , syncContainer .ID ); err != nil {
377
+ return errors .Wrap (err , "failed to make a checkpoint for sync instance" )
378
+ }
379
+
380
+ return nil
381
+ }
382
+
383
+ func (p * PhysicalInitial ) syncInstanceName () string {
384
+ return cont .SyncInstanceContainerPrefix + p .globalCfg .InstanceID
385
+ }
386
+
356
387
func (p * PhysicalInitial ) startScheduler (ctx context.Context ) {
357
388
if p .scheduler == nil || ! p .hasSchedulingOptions () {
358
389
return
@@ -409,7 +440,7 @@ func (p *PhysicalInitial) promoteContainerName() string {
409
440
return promoteContainerPrefix + p .globalCfg .InstanceID
410
441
}
411
442
412
- func (p * PhysicalInitial ) promoteInstance (ctx context.Context , clonePath string ) (err error ) {
443
+ func (p * PhysicalInitial ) promoteInstance (ctx context.Context , clonePath string , syncErr error ) (err error ) {
413
444
p .promotionMutex .Lock ()
414
445
defer p .promotionMutex .Unlock ()
415
446
@@ -436,10 +467,17 @@ func (p *PhysicalInitial) promoteInstance(ctx context.Context, clonePath string)
436
467
}
437
468
}
438
469
439
- recoveryConfig := buildRecoveryConfig (recoveryFileConfig , p .options .Promotion .Recovery )
470
+ recoveryConfig := make (map [string ]string )
471
+
472
+ // Item 5. Remove a recovery file: https://gitlab.com/postgres-ai/database-lab/-/issues/236#note_513401256
473
+ if syncErr != nil {
474
+ recoveryConfig = buildRecoveryConfig (recoveryFileConfig , p .options .Promotion .Recovery )
440
475
441
- if err := cfgManager .ApplyRecovery (recoveryFileConfig ); err != nil {
442
- return errors .Wrap (err , "failed to apply recovery configuration" )
476
+ if err := cfgManager .ApplyRecovery (recoveryFileConfig ); err != nil {
477
+ return errors .Wrap (err , "failed to apply recovery configuration" )
478
+ }
479
+ } else if err := cfgManager .RemoveRecoveryConfig (); err != nil {
480
+ log .Err (errors .Wrap (err , "failed to remove recovery config file" ))
443
481
}
444
482
445
483
// Apply promotion configs.
@@ -557,6 +595,13 @@ func (p *PhysicalInitial) promoteInstance(ctx context.Context, clonePath string)
557
595
return errors .Wrap (err , "failed to store prepared configuration" )
558
596
}
559
597
598
+ const pgStopTimeout = 600
599
+
600
+ if err := tools .StopPostgres (ctx , p .dockerClient , promoteCont .ID , clonePath , pgStopTimeout ); err != nil {
601
+ log .Msg ("Failed to stop Postgres" , err )
602
+ tools .PrintContainerLogs (ctx , p .dockerClient , promoteCont .ID )
603
+ }
604
+
560
605
return nil
561
606
}
562
607
0 commit comments