Skip to content

Commit 1b3a72d

Browse files
authored
Merge pull request #984 from Project-MONAI/nds-stats-fix
adding failure reason to executionStats
2 parents 790f4ac + ee869d7 commit 1b3a72d

File tree

7 files changed

+160
-10
lines changed

7 files changed

+160
-10
lines changed

src/WorkflowManager/Contracts/Migrations/M001_ExecutionStats_addVersion.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ namespace Monai.Deploy.WorkflowManager.Common.Contracts.Migrations
2222
{
2323
public class M001_ExecutionStats_addVersion : DocumentMigration<ExecutionStats>
2424
{
25-
public M001_ExecutionStats_addVersion() : base("1.0.0") { }
25+
public M001_ExecutionStats_addVersion() : base("1.0.1") { }
2626

2727
public override void Up(BsonDocument document)
2828
{

src/WorkflowManager/Contracts/Migrations/M002_ExecutionStats_addWorkflowId.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ namespace Monai.Deploy.WorkflowManager.Common.Contracts.Migrations
2222
{
2323
public class M002_ExecutionStats_addWorkflowId : DocumentMigration<ExecutionStats>
2424
{
25-
public M002_ExecutionStats_addWorkflowId() : base("1.0.1") { }
25+
public M002_ExecutionStats_addWorkflowId() : base("1.0.2") { }
2626

2727
public override void Up(BsonDocument document)
2828
{
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Copyright 2022 MONAI Consortium
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
using Monai.Deploy.WorkflowManager.Common.Contracts.Models;
18+
using Mongo.Migration.Migrations.Document;
19+
using MongoDB.Bson;
20+
21+
namespace Monai.Deploy.WorkflowManager.Common.Contracts.Migrations
22+
{
23+
public class M003_ExecutionStats_addFailureReason : DocumentMigration<ExecutionStats>
24+
{
25+
public M003_ExecutionStats_addFailureReason() : base("1.0.3") { }
26+
27+
public override void Up(BsonDocument document)
28+
{
29+
// empty, but this will make all objects re-saved with a reason
30+
}
31+
public override void Down(BsonDocument document)
32+
{
33+
try
34+
{
35+
document.Remove("Reason");
36+
}
37+
catch
38+
{ // can ignore we dont want failures stopping startup !
39+
}
40+
}
41+
}
42+
}

src/WorkflowManager/Contracts/Models/ExecutionStats.cs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
using System;
1818
using System.ComponentModel.DataAnnotations;
1919
using Monai.Deploy.WorkflowManager.Common.Contracts.Migrations;
20-
using Ardalis.GuardClauses;
2120
using Monai.Deploy.Messaging.Events;
2221
using Mongo.Migration.Documents;
2322
using Mongo.Migration.Documents.Attributes;
@@ -26,7 +25,7 @@
2625

2726
namespace Monai.Deploy.WorkflowManager.Common.Contracts.Models
2827
{
29-
[CollectionLocation("ExecutionStats"), RuntimeVersion("1.0.1")]
28+
[CollectionLocation("ExecutionStats"), RuntimeVersion("1.0.3")]
3029
public class ExecutionStats : IDocument
3130
{
3231
/// <summary>
@@ -40,7 +39,7 @@ public class ExecutionStats : IDocument
4039
/// Gets or sets Db version.
4140
/// </summary>
4241
[JsonConverter(typeof(DocumentVersionConvert)), BsonSerializer(typeof(DocumentVersionConverBson))]
43-
public DocumentVersion Version { get; set; } = new DocumentVersion(1, 0, 1);
42+
public DocumentVersion Version { get; set; } = new DocumentVersion(1, 0, 2);
4443

4544
/// <summary>
4645
/// the correlationId of the event
@@ -110,6 +109,12 @@ public class ExecutionStats : IDocument
110109
[JsonProperty(PropertyName = "status")]
111110
public string Status { get; set; } = TaskExecutionStatus.Created.ToString();
112111

112+
/// <summary>
113+
/// Gets or sets the failure reason.
114+
/// </summary>
115+
[JsonProperty(PropertyName = "reason")]
116+
public FailureReason Reason { get; set; }
117+
113118
/// <summary>
114119
/// Gets or sets the duration, difference between startedAt and CompletedAt time.
115120
/// </summary>
@@ -134,6 +139,7 @@ public ExecutionStats(TaskExecution execution, string workflowId, string correla
134139
StartedUTC = execution.TaskStartTime.ToUniversalTime();
135140
Status = execution.Status.ToString();
136141
WorkflowId = workflowId;
142+
Reason = execution.Reason;
137143
}
138144

139145
public ExecutionStats(TaskUpdateEvent taskUpdateEvent, string workflowId)
@@ -145,6 +151,7 @@ public ExecutionStats(TaskUpdateEvent taskUpdateEvent, string workflowId)
145151
TaskId = taskUpdateEvent.TaskId;
146152
Status = taskUpdateEvent.Status.ToString();
147153
WorkflowId = workflowId;
154+
Reason = taskUpdateEvent.Reason;
148155
}
149156

150157
public ExecutionStats(TaskCancellationEvent taskCanceledEvent, string workflowId, string correlationId)
@@ -156,6 +163,7 @@ public ExecutionStats(TaskCancellationEvent taskCanceledEvent, string workflowId
156163
TaskId = taskCanceledEvent.TaskId;
157164
Status = TaskExecutionStatus.Failed.ToString();
158165
WorkflowId = workflowId;
166+
Reason = taskCanceledEvent.Reason;
159167
}
160168
}
161169
}

src/WorkflowManager/Database/Repositories/TaskExecutionStatsRepository.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ await _taskExecutionStatsCollection.UpdateOneAsync(o =>
110110
.Set(w => w.CompletedAtUTC, updateMe.CompletedAtUTC)
111111
.Set(w => w.ExecutionTimeSeconds, updateMe.ExecutionTimeSeconds)
112112
.Set(w => w.DurationSeconds, duration)
113+
.Set(w => w.Reason, taskUpdateEvent.Reason)
113114

114115
, new UpdateOptions { IsUpsert = true }).ConfigureAwait(false);
115116
}
@@ -132,6 +133,7 @@ await _taskExecutionStatsCollection.UpdateOneAsync(o =>
132133
o.ExecutionId == updateMe.ExecutionId,
133134
Builders<ExecutionStats>.Update
134135
.Set(w => w.Status, updateMe.Status)
136+
.Set(w => w.Reason, taskCanceledEvent.Reason)
135137
.Set(w => w.LastUpdatedUTC, DateTime.UtcNow)
136138
.Set(w => w.CompletedAtUTC, updateMe.CompletedAtUTC)
137139
.Set(w => w.DurationSeconds, duration)

src/WorkflowManager/WorkflowManager/Controllers/TaskStatsController.cs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,13 +136,15 @@ public async Task<IActionResult> GetDailyStatsAsync([FromQuery] TimeFilter filte
136136
{
137137
Date = DateOnly.FromDateTime(g.Key.Date),
138138
TotalExecutions = g.Count(),
139-
TotalFailures = g.Count(i => string.Compare(i.Status, "Failed", true) == 0),
140-
TotalApprovals = g.Count(i => string.Compare(i.Status, ApplicationReviewStatus.Approved.ToString(), true) == 0),
141-
TotalRejections = g.Count(i => string.Compare(i.Status, ApplicationReviewStatus.Rejected.ToString(), true) == 0),
142-
TotalCancelled = g.Count(i => string.Compare(i.Status, ApplicationReviewStatus.Cancelled.ToString(), true) == 0),
139+
TotalFailures = g.Count(i => string.Compare(i.Status, "Failed", true) == 0 && i.Reason != FailureReason.TimedOut && i.Reason != FailureReason.Rejected),
140+
TotalApprovals = g.Count(i => string.Compare(i.Status, "Succeeded", true) == 0 && i.Reason == FailureReason.None),
141+
TotalRejections = g.Count(i => string.Compare(i.Status, "Failed", true) == 0 && i.Reason == FailureReason.Rejected),
142+
TotalCancelled = g.Count(i => string.Compare(i.Status, "Failed", true) == 0 && i.Reason == FailureReason.TimedOut),
143143
TotalAwaitingReview = g.Count(i => string.Compare(i.Status, ApplicationReviewStatus.AwaitingReview.ToString(), true) == 0),
144144
});
145145

146+
147+
146148
var pagedStats = statsDto.Skip((filter.PageNumber - 1) * pageSize).Take(pageSize);
147149

148150
var res = CreateStatsPagedResponse(pagedStats, validFilter, statsDto.Count(), _uriService, route);
@@ -152,7 +154,7 @@ public async Task<IActionResult> GetDailyStatsAsync([FromQuery] TimeFilter filte
152154
res.PeriodEnd = filter.EndTime;
153155
res.TotalExecutions = allStats.Count();
154156
res.TotalSucceeded = statsDto.Sum(s => s.TotalApprovals);
155-
res.TotalFailures = statsDto.Sum(s => s.TotalFailures);
157+
res.TotalFailures = statsDto.Sum(s => s.TotalFailures + s.TotalCancelled + s.TotalRejections);
156158
res.TotalInprogress = statsDto.Sum(s => s.TotalAwaitingReview);
157159
res.AverageTotalExecutionSeconds = Math.Round(avgTotalExecution, 2);
158160
res.AverageArgoExecutionSeconds = Math.Round(avgArgoExecution, 2);

tests/UnitTests/WorkflowManager.Tests/Controllers/TaskExecutionStatsControllerTests.cs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,102 @@ public async Task GetAllStatsAsync_Pass_All_Arguments_To_GetStatsAsync_In_Repo()
336336
It.Is<string>(s => s.Equals("")))
337337
);
338338
}
339+
340+
[Fact]
341+
public async Task GetAllStatsAsync_Get_Correct_Reject_Count()
342+
{
343+
var startTime = new DateTime(2023, 4, 4);
344+
var endTime = new DateTime(2023, 4, 5);
345+
const int pageNumber = 1;
346+
const int pageSize = 10;
347+
348+
var executionStats = new ExecutionStats[]
349+
{
350+
new ExecutionStats
351+
{
352+
ExecutionId = Guid.NewGuid().ToString(),
353+
StartedUTC = _startTime,
354+
WorkflowInstanceId= "workflow",
355+
TaskId = "task",
356+
Status = "Failed",
357+
Reason = Messaging.Events.FailureReason.Rejected,
358+
},
359+
};
360+
361+
_repo.Setup(w => w.GetAllStatsAsync(It.IsAny<DateTime>(), It.IsAny<DateTime>(), It.IsAny<string>(), It.IsAny<string>())).ReturnsAsync(executionStats);
362+
363+
var result = await StatsController.GetDailyStatsAsync(new TimeFilter { StartTime = startTime, EndTime = endTime, PageNumber = pageNumber, PageSize = pageSize }, "workflow");
364+
365+
var resultCollection = result.As<OkObjectResult>().Value.As<StatsPagedResponse<IEnumerable<ExecutionStatDayOverview>>>().Data;
366+
367+
Assert.Equal(1, resultCollection.First().TotalExecutions);
368+
Assert.Equal(1, resultCollection.First().TotalRejections);
369+
Assert.Equal(0, resultCollection.First().TotalFailures);
370+
}
371+
372+
[Fact]
373+
public async Task GetAllStatsAsync_Get_Correct_Canceled_Count()
374+
{
375+
var startTime = new DateTime(2023, 4, 4);
376+
var endTime = new DateTime(2023, 4, 5);
377+
const int pageNumber = 1;
378+
const int pageSize = 10;
379+
380+
var executionStats = new ExecutionStats[]
381+
{
382+
new ExecutionStats
383+
{
384+
ExecutionId = Guid.NewGuid().ToString(),
385+
StartedUTC = _startTime,
386+
WorkflowInstanceId= "workflow",
387+
TaskId = "task",
388+
Status = "Failed",
389+
Reason = Messaging.Events.FailureReason.TimedOut,
390+
},
391+
};
392+
393+
_repo.Setup(w => w.GetAllStatsAsync(It.IsAny<DateTime>(), It.IsAny<DateTime>(), It.IsAny<string>(), It.IsAny<string>())).ReturnsAsync(executionStats);
394+
395+
var result = await StatsController.GetDailyStatsAsync(new TimeFilter { StartTime = startTime, EndTime = endTime, PageNumber = pageNumber, PageSize = pageSize }, "workflow");
396+
397+
var resultCollection = result.As<OkObjectResult>().Value.As<StatsPagedResponse<IEnumerable<ExecutionStatDayOverview>>>().Data;
398+
399+
Assert.Equal(1, resultCollection.First().TotalExecutions);
400+
Assert.Equal(1, resultCollection.First().TotalCancelled);
401+
Assert.Equal(0, resultCollection.First().TotalFailures);
402+
}
403+
404+
[Fact]
405+
public async Task GetAllStatsAsync_Get_Correct_Accepted_Count()
406+
{
407+
var startTime = new DateTime(2023, 4, 4);
408+
var endTime = new DateTime(2023, 4, 5);
409+
const int pageNumber = 1;
410+
const int pageSize = 10;
411+
412+
var executionStats = new ExecutionStats[]
413+
{
414+
new ExecutionStats
415+
{
416+
ExecutionId = Guid.NewGuid().ToString(),
417+
StartedUTC = _startTime,
418+
WorkflowInstanceId= "workflow",
419+
TaskId = "task",
420+
Status = "Succeeded",
421+
Reason = Messaging.Events.FailureReason.None,
422+
},
423+
};
424+
425+
_repo.Setup(w => w.GetAllStatsAsync(It.IsAny<DateTime>(), It.IsAny<DateTime>(), It.IsAny<string>(), It.IsAny<string>())).ReturnsAsync(executionStats);
426+
427+
var result = await StatsController.GetDailyStatsAsync(new TimeFilter { StartTime = startTime, EndTime = endTime, PageNumber = pageNumber, PageSize = pageSize }, "workflow");
428+
429+
var resultCollection = result.As<OkObjectResult>().Value.As<StatsPagedResponse<IEnumerable<ExecutionStatDayOverview>>>().Data;
430+
431+
Assert.Equal(1, resultCollection.First().TotalExecutions);
432+
Assert.Equal(1, resultCollection.First().TotalApprovals);
433+
Assert.Equal(0, resultCollection.First().TotalFailures);
434+
}
339435
}
340436
#pragma warning restore CS8604 // Possible null reference argument.
341437
#pragma warning restore CS8602 // Dereference of a possibly null reference.

0 commit comments

Comments
 (0)