Skip to content

Commit d257b88

Browse files
Fix SR anomaly score calculation at beginning (dotnet#5502)
* adjust expected value * update boundary calculation * fix boundary * adjust default values * fix percent case * fix error in anomaly score calculation * adjust score calculation for first & second points * fix sr do not report anomaly at beginning * fix a issue in batch process * remove a unused parameter Co-authored-by: [email protected] <[email protected]>
1 parent 652abaa commit d257b88

File tree

3 files changed

+123
-3
lines changed

3 files changed

+123
-3
lines changed

Diff for: src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs

+26-3
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,15 @@ public void Process()
309309
_previousBatch = _previousBatch.GetRange(_batch.Count, _bLen);
310310
_previousBatch.AddRange(_batch);
311311
_modeler.Train(_previousBatch.ToArray(), ref _results);
312+
313+
// move the values to front
314+
for (int i = 0; i < _batch.Count; ++i)
315+
{
316+
for (int j = 0; j < _outputLength; ++j)
317+
{
318+
_results[i][j] = _results[_bLen + i][j];
319+
}
320+
}
312321
}
313322
else
314323
{
@@ -334,7 +343,7 @@ public ValueGetter<VBuffer<double>> CreateGetter(DataViewRowCursor input, string
334343
double src = default;
335344
srcGetter(ref src);
336345
var result = VBufferEditor.Create(ref dst, _outputLength);
337-
_results[input.Position % _batchSize + _bLen].CopyTo(result.Values);
346+
_results[input.Position % _batchSize].CopyTo(result.Values);
338347
dst = result.Commit();
339348
};
340349
return getter;
@@ -351,6 +360,15 @@ internal sealed class SrCnnEntireModeler
351360
private static readonly double _deanomalyThreshold = 0.35;
352361
private static readonly double _boundSensitivity = 93.0;
353362
private static readonly double _unitForZero = 0.3;
363+
private static readonly double _minimumScore = 0.0;
364+
private static readonly double _maximumScore = 1.0;
365+
// If the score window is smaller than this value, the anomaly score is tend to be small.
366+
// Proof: For each point, the SR anomaly score is calculated as (w is average window size):
367+
// (mag - avg_mag) / avg_mag
368+
// = max (w * mag_{a} - sum_{i=0 to w-1} mag_{a - i}) / sum_{i=0 to w-1} mag_{a - i}
369+
// = max ((w - 1) * mag_{a} + C) / (mag_{a} + C)
370+
// <= w - 1
371+
private static readonly int _minimumScoreWindowSize = (int)(_maximumScore * 10) + 1;
354372

355373
// pseudo-code to generate the factors.
356374
// factors = []
@@ -577,15 +595,20 @@ private void SpectralResidual(double[] values, double[][] results, double thresh
577595
{
578596
_ifftMagList[i] = Math.Sqrt(_ifftRe[i] * _ifftRe[i] + _ifftIm[i] * _ifftIm[i]);
579597
}
598+
580599
AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize));
600+
for (int i = 0; i <= Math.Min(length, _minimumScoreWindowSize); ++i)
601+
{
602+
_cumSumList[i] = _cumSumList[Math.Min(length, _minimumScoreWindowSize) - 1];
603+
}
581604

582605
// Step 7: Calculate raw score and set result
583606
for (int i = 0; i < results.GetLength(0); ++i)
584607
{
585608
var score = CalculateScore(_ifftMagList[i], _cumSumList[i]);
586609
score /= 10.0f;
587-
score = Math.Min(score, 1);
588-
score = Math.Max(score, 0);
610+
score = Math.Min(score, _maximumScore);
611+
score = Math.Max(score, _minimumScore);
589612

590613
var detres = score > threshold ? 1 : 0;
591614

Diff for: test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs

+58
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,64 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData(
776776
}
777777
}
778778

779+
[Theory, CombinatorialData]
780+
public void TestSrCnnAnomalyDetectorWithAnomalyAtBeginning(
781+
[CombinatorialValues(SrCnnDeseasonalityMode.Stl, SrCnnDeseasonalityMode.Mean, SrCnnDeseasonalityMode.Median)] SrCnnDeseasonalityMode mode
782+
)
783+
{
784+
var ml = new MLContext(1);
785+
IDataView dataView;
786+
List<TimeSeriesDataDouble> data;
787+
788+
var dataPath = GetDataPath("Timeseries", "anomaly_at_beginning.csv");
789+
790+
// Load data from file into the dataView
791+
dataView = ml.Data.LoadFromTextFile<TimeSeriesDataDouble>(dataPath, hasHeader: true);
792+
data = ml.Data.CreateEnumerable<TimeSeriesDataDouble>(dataView, reuseRowObject: false).ToList();
793+
794+
// Setup the detection arguments
795+
string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction);
796+
string inputColumnName = nameof(TimeSeriesDataDouble.Value);
797+
798+
// Do batch anomaly detection
799+
var options = new SrCnnEntireAnomalyDetectorOptions()
800+
{
801+
Threshold = 0.30,
802+
BatchSize = -1,
803+
Sensitivity = 80.0,
804+
DetectMode = SrCnnDetectMode.AnomalyAndMargin,
805+
Period = 0,
806+
DeseasonalityMode = mode
807+
};
808+
809+
var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options);
810+
811+
// Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection.
812+
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>(
813+
outputDataView, reuseRowObject: false);
814+
815+
var anomalyIndex = 1;
816+
817+
int k = 0;
818+
foreach (var prediction in predictionColumn)
819+
{
820+
Assert.Equal(7, prediction.Prediction.Length);
821+
if (anomalyIndex == k)
822+
{
823+
Assert.Equal(1, prediction.Prediction[0]);
824+
Assert.True(prediction.Prediction[6] > data[k].Value || data[k].Value > prediction.Prediction[5]);
825+
}
826+
else
827+
{
828+
Assert.Equal(0, prediction.Prediction[0]);
829+
Assert.True(prediction.Prediction[6] <= data[k].Value);
830+
Assert.True(data[k].Value <= prediction.Prediction[5]);
831+
}
832+
833+
++k;
834+
}
835+
}
836+
779837
[Theory, CombinatorialData]
780838
public void TestSrcnnEntireDetectNonnegativeData(
781839
[CombinatorialValues(true, false)] bool isPositive)

Diff for: test/data/Timeseries/anomaly_at_beginning.csv

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
Value
2+
181.944
3+
37.176
4+
57.14
5+
67.128
6+
72.12
7+
77.112
8+
82.104
9+
83.1
10+
87.09
11+
92.088
12+
92.01
13+
97.08
14+
102.072
15+
107.05
16+
107.06
17+
117.048
18+
122.04
19+
132.024
20+
147
21+
151.82
22+
151.992
23+
151.72
24+
151.94
25+
156.969
26+
156.984
27+
156.92
28+
161.976
29+
161.94
30+
161.97
31+
166.968
32+
176.952
33+
181.94
34+
186.936
35+
201.91
36+
201.912
37+
201.9
38+
206.904
39+
216.88

0 commit comments

Comments
 (0)