Skip to content

Commit cd26e01

Browse files
authored
Merge pull request #265 from form3tech-oss/nvloff-stuck-iterations
fix: exit after a timeout on stuck iterations
2 parents 105ce68 + b43bfb9 commit cd26e01

File tree

5 files changed

+166
-83
lines changed

5 files changed

+166
-83
lines changed

internal/run/run_cmd.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ import (
1616
"github.com/form3tech-oss/f1/v2/pkg/f1/scenarios"
1717
)
1818

19+
const waitForCompletionTimeout = 10 * time.Second
20+
1921
func Cmd(
2022
s *scenarios.Scenarios,
2123
builders []api.Builder,
@@ -157,7 +159,7 @@ func runCmdExecute(
157159
MaxFailures: maxFailures,
158160
MaxFailuresRate: maxFailuresRate,
159161
IgnoreDropped: ignoreDropped,
160-
}, s, trig, settings, metricsInstance, output)
162+
}, s, trig, waitForCompletionTimeout, settings, metricsInstance, output)
161163
if err != nil {
162164
return fmt.Errorf("new run: %w", err)
163165
}

internal/run/run_cmd_test.go

+62-1
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,68 @@ func TestInterruptedRun(t *testing.T) {
563563
then.
564564
setup_teardown_is_called_within(600 * time.Millisecond).and().
565565
metrics_are_pushed_to_prometheus().and().
566-
there_is_a_metric_called("form3_loadtest_iteration")
566+
there_is_a_metric_called("form3_loadtest_iteration").and().
567+
expect_the_stdout_output_to_include([]string{
568+
"Interrupted - waiting for active tests to complete",
569+
})
570+
}
571+
572+
func TestInterruptedRun_TimesOut(t *testing.T) {
573+
t.Parallel()
574+
575+
given, when, then := NewRunTestStage(t)
576+
577+
given.
578+
a_timer_is_started().
579+
a_rate_of("1/1s").and().
580+
a_duration_of(5 * time.Second).and().
581+
a_scenario_where_each_iteration_takes(3 * time.Second).and().
582+
wait_for_completion_timeout_of(1 * time.Second).and().
583+
a_distribution_type("none")
584+
585+
when.
586+
the_run_command_is_executed_and_cancelled_after(500 * time.Millisecond)
587+
588+
then.
589+
setup_teardown_is_called_within(600*time.Millisecond + 1*time.Second).and().
590+
metrics_are_pushed_to_prometheus().and().
591+
expect_the_stdout_output_to_include([]string{
592+
"Interrupted - waiting for active tests to complete",
593+
"Active tests not completed after 1s. Stopping...",
594+
})
595+
596+
// needed for goroutine checker
597+
// sleep is the time left in the interation duration after the timeout
598+
time.Sleep(3 * time.Second)
599+
}
600+
601+
func TestMaxDurationReached_TimesOut(t *testing.T) {
602+
t.Parallel()
603+
604+
given, when, then := NewRunTestStage(t)
605+
606+
given.
607+
a_timer_is_started().
608+
a_rate_of("1/1s").and().
609+
a_duration_of(500 * time.Millisecond).and().
610+
a_scenario_where_each_iteration_takes(3 * time.Second).and().
611+
wait_for_completion_timeout_of(1 * time.Second).and().
612+
a_distribution_type("none")
613+
614+
when.
615+
the_run_command_is_executed()
616+
617+
then.
618+
setup_teardown_is_called_within(600*time.Millisecond + 1*time.Second).and().
619+
metrics_are_pushed_to_prometheus().and().
620+
expect_the_stdout_output_to_include([]string{
621+
"Max Duration Elapsed - waiting for active tests to complete",
622+
"Active tests not completed after 1s. Stopping...",
623+
})
624+
625+
// needed for goroutine checker
626+
// sleep is the time left in the interation duration after the timeout
627+
time.Sleep(3 * time.Second)
567628
}
568629

569630
func TestFinalRunMetrics(t *testing.T) {

internal/run/run_stage_test.go

+60-50
Original file line numberDiff line numberDiff line change
@@ -68,58 +68,60 @@ type (
6868
)
6969

7070
type RunTestStage struct {
71-
startTime time.Time
72-
metrics *metrics.Metrics
73-
output *ui.Output
74-
runInstance *run.Run
75-
runResult *run.Result
76-
t *testing.T
77-
require *require.Assertions
78-
metricData *MetricData
79-
scenarioCleanup func()
80-
assert *assert.Assertions
81-
iterationCleanup func()
82-
f1 *f1.F1
83-
durations sync.Map
84-
frequency string
85-
rate string
86-
stages string
87-
distributionType string
88-
configFile string
89-
startRate string
90-
endRate string
91-
rampDuration string
92-
scenario string
93-
settings envsettings.Settings
94-
maxFailures uint64
95-
maxIterations uint64
96-
maxFailuresRate int
97-
duration time.Duration
98-
concurrency int
99-
triggerType TriggerType
100-
iterationTeardownCount atomic.Uint32
101-
setupTeardownCount atomic.Uint32
102-
runCount atomic.Uint32
103-
stdout syncWriter
104-
stderr syncWriter
105-
interactive bool
106-
verbose bool
71+
startTime time.Time
72+
metrics *metrics.Metrics
73+
output *ui.Output
74+
runInstance *run.Run
75+
runResult *run.Result
76+
t *testing.T
77+
require *require.Assertions
78+
metricData *MetricData
79+
scenarioCleanup func()
80+
assert *assert.Assertions
81+
iterationCleanup func()
82+
f1 *f1.F1
83+
durations sync.Map
84+
frequency string
85+
rate string
86+
stages string
87+
distributionType string
88+
configFile string
89+
startRate string
90+
endRate string
91+
rampDuration string
92+
scenario string
93+
settings envsettings.Settings
94+
maxFailures uint64
95+
maxIterations uint64
96+
maxFailuresRate int
97+
duration time.Duration
98+
waitForCompletionTimeout time.Duration
99+
concurrency int
100+
triggerType TriggerType
101+
iterationTeardownCount atomic.Uint32
102+
setupTeardownCount atomic.Uint32
103+
runCount atomic.Uint32
104+
stdout syncWriter
105+
stderr syncWriter
106+
interactive bool
107+
verbose bool
107108
}
108109

109110
func NewRunTestStage(t *testing.T) (*RunTestStage, *RunTestStage, *RunTestStage) {
110111
t.Helper()
111112
stage := &RunTestStage{
112-
t: t,
113-
concurrency: 100,
114-
assert: assert.New(t),
115-
require: require.New(t),
116-
f1: f1.New(),
117-
settings: envsettings.Get(),
118-
metricData: NewMetricData(),
119-
output: ui.NewDiscardOutput(),
120-
metrics: metrics.NewInstance(prometheus.NewRegistry(), true),
121-
stdout: syncWriter{writer: &bytes.Buffer{}},
122-
stderr: syncWriter{writer: &bytes.Buffer{}},
113+
t: t,
114+
concurrency: 100,
115+
assert: assert.New(t),
116+
require: require.New(t),
117+
f1: f1.New(),
118+
settings: envsettings.Get(),
119+
metricData: NewMetricData(),
120+
output: ui.NewDiscardOutput(),
121+
metrics: metrics.NewInstance(prometheus.NewRegistry(), true),
122+
stdout: syncWriter{writer: &bytes.Buffer{}},
123+
stderr: syncWriter{writer: &bytes.Buffer{}},
124+
waitForCompletionTimeout: 5 * time.Second,
123125
}
124126

125127
handler := FakePrometheusHandler(t, stage.metricData)
@@ -141,6 +143,11 @@ func (s *RunTestStage) a_rate_of(rate string) *RunTestStage {
141143
return s
142144
}
143145

146+
func (s *RunTestStage) wait_for_completion_timeout_of(timeout time.Duration) *RunTestStage {
147+
s.waitForCompletionTimeout = timeout
148+
return s
149+
}
150+
144151
func (s *RunTestStage) and() *RunTestStage {
145152
return s
146153
}
@@ -198,7 +205,7 @@ func (s *RunTestStage) setupRun() {
198205
MaxFailures: s.maxFailures,
199206
MaxFailuresRate: s.maxFailuresRate,
200207
Verbose: s.verbose,
201-
}, s.f1.GetScenarios(), s.build_trigger(), s.settings, s.metrics, outputer)
208+
}, s.f1.GetScenarios(), s.build_trigger(), s.waitForCompletionTimeout, s.settings, s.metrics, outputer)
202209

203210
s.require.NoError(err)
204211
s.runInstance = r
@@ -218,8 +225,11 @@ func (s *RunTestStage) the_run_command_is_executed_and_cancelled_after(duration
218225
s.setupRun()
219226

220227
var err error
221-
ctx, cancel := context.WithTimeout(context.TODO(), duration)
222-
defer cancel()
228+
ctx, cancel := context.WithCancel(context.TODO())
229+
go func() {
230+
<-time.After(duration)
231+
cancel()
232+
}()
223233

224234
s.runResult, err = s.runInstance.Do(ctx)
225235
s.require.NoError(err)

internal/run/test_runner.go

+40-22
Original file line numberDiff line numberDiff line change
@@ -30,22 +30,24 @@ const (
3030
)
3131

3232
type Run struct {
33-
pusher *push.Pusher
34-
progressRunner *raterun.Runner
35-
metrics *metrics.Metrics
36-
views *views.Views
37-
activeScenario *workers.ActiveScenario
38-
trigger *api.Trigger
39-
output *ui.Output
40-
scenarioLogger *ScenarioLogger
41-
result *Result
42-
options options.RunOptions
33+
pusher *push.Pusher
34+
progressRunner *raterun.Runner
35+
metrics *metrics.Metrics
36+
views *views.Views
37+
activeScenario *workers.ActiveScenario
38+
trigger *api.Trigger
39+
output *ui.Output
40+
scenarioLogger *ScenarioLogger
41+
result *Result
42+
options options.RunOptions
43+
waitForCompletionTimeout time.Duration
4344
}
4445

4546
func NewRun(
4647
options options.RunOptions,
4748
scenarios *scenarios.Scenarios,
4849
trigger *api.Trigger,
50+
waitForCompletionTimeout time.Duration,
4951
settings envsettings.Settings,
5052
metricsInstance *metrics.Metrics,
5153
parentOutput *ui.Output,
@@ -91,16 +93,17 @@ func NewRun(
9193
pusher := newMetricsPusher(settings, scenario.Name, metricsInstance)
9294

9395
return &Run{
94-
options: options,
95-
trigger: trigger,
96-
metrics: metricsInstance,
97-
views: viewsInstance,
98-
result: result,
99-
pusher: pusher,
100-
output: outputer,
101-
progressRunner: progressRunner,
102-
activeScenario: activeScenario,
103-
scenarioLogger: scenarioLogger,
96+
options: options,
97+
trigger: trigger,
98+
metrics: metricsInstance,
99+
views: viewsInstance,
100+
result: result,
101+
pusher: pusher,
102+
output: outputer,
103+
progressRunner: progressRunner,
104+
activeScenario: activeScenario,
105+
scenarioLogger: scenarioLogger,
106+
waitForCompletionTimeout: waitForCompletionTimeout,
104107
}, nil
105108
}
106109

@@ -254,12 +257,27 @@ func (r *Run) run(ctx context.Context) {
254257
case <-ctx.Done():
255258
r.output.Display(r.result.Interrupted())
256259
r.progressRunner.Restart()
257-
<-poolManager.WaitForCompletion()
260+
select {
261+
case <-poolManager.WaitForCompletion():
262+
case <-time.After(r.waitForCompletionTimeout):
263+
r.output.Display(ui.WarningMessage{
264+
Message: fmt.Sprintf("Active tests not completed after %s. Stopping...", r.waitForCompletionTimeout.String()),
265+
})
266+
}
267+
258268
case <-triggerCtx.Done():
259269
if triggerCtx.Err() == context.DeadlineExceeded {
260270
r.output.Display(r.result.MaxDurationElapsed())
271+
} else {
272+
r.output.Display(r.result.Interrupted())
273+
}
274+
select {
275+
case <-poolManager.WaitForCompletion():
276+
case <-time.After(r.waitForCompletionTimeout):
277+
r.output.Display(ui.WarningMessage{
278+
Message: fmt.Sprintf("Active tests not completed after %s. Stopping...", r.waitForCompletionTimeout.String()),
279+
})
261280
}
262-
<-poolManager.WaitForCompletion()
263281
case <-poolManager.WaitForCompletion():
264282
if poolManager.MaxIterationsReached() {
265283
r.output.Display(r.result.MaxIterationsReached())

internal/ui/printer.go

+1-9
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,6 @@ func (t *Printer) Error(a ...any) {
3434
fmt.Fprintln(t.ErrWriter, a...)
3535
}
3636

37-
func (t *Printer) Printf(format string, a ...any) {
38-
fmt.Fprintf(t.Writer, format, a...)
39-
}
40-
4137
func (t *Printer) Warn(a ...any) {
42-
fmt.Fprint(t.ErrWriter, a...)
43-
}
44-
45-
func (t *Printer) Warnf(format string, a ...any) {
46-
fmt.Fprintf(t.ErrWriter, format, a...)
38+
fmt.Fprintln(t.ErrWriter, a...)
4739
}

0 commit comments

Comments
 (0)