Skip to content

Commit a592661

Browse files
committed
Add Tests and Key Files
1 parent 386e8f8 commit a592661

6 files changed

+268
-0
lines changed

ecl/ClassificationTestModified.ecl

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*##############################################################################
2+
3+
HPCC SYSTEMS software Copyright (C) 2022 HPCC Systems®.
4+
5+
Licensed under the Apache License, Version 2.0 (the "License");
6+
you may not use this file except in compliance with the License.
7+
You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
############################################################################## */
17+
18+
#ONWARNING(2007, ignore);
19+
#ONWARNING(4531, ignore);
20+
#ONWARNING(4550, ignore);
21+
22+
// Modified version of the testCovTypeClass test file that works with the
23+
// OBT test system
24+
25+
IMPORT $.^.test.datasets.CovTypeDS;
26+
IMPORT $.^ AS LT;
27+
IMPORT LT.LT_Types;
28+
IMPORT ML_Core;
29+
IMPORT ML_Core.Types;
30+
31+
numTrees := 100;
32+
maxDepth := 255;
33+
numFeatures := 0; // Zero is automatic choice
34+
balanceClasses := FALSE;
35+
nonSequentialIds := TRUE; // True to renumber ids, numbers and work-items to test
36+
// support for non-sequentiality
37+
numWIs := 2; // The number of independent work-items to create
38+
maxRecs := 5000; // Note that this has to be less than or equal to the number of records
39+
// in CovTypeDS (currently 5000)
40+
DependentVar := 52; // Dependent Variable meant for this function
41+
42+
43+
DiscreteField := Types.DiscreteField;
44+
NumericField := Types.NumericField;
45+
trainDat := CovTypeDS.trainRecs;
46+
testDat := CovTypeDS.testRecs;
47+
nominalFields := CovTypeDS.nominalCols;
48+
49+
ClassTest() := FUNCTION
50+
ML_Core.ToField(trainDat, trainNF); // Get training data as a field
51+
ML_Core.ToField(testDat, testNF); // Get test data as a field
52+
53+
//Ind = independent, Dep = dependent
54+
Ind1 := PROJECT(trainNF(number != DependentVar AND id <= maxRecs), TRANSFORM(NumericField,
55+
SELF.number := IF(nonSequentialIds, 5*LEFT.number, LEFT.number),
56+
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id),
57+
SELF := LEFT));
58+
Dep1 := PROJECT(trainNF(number = DependentVar AND id <= maxRecs), TRANSFORM(DiscreteField,
59+
SELF.number := 1,
60+
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id),
61+
SELF := LEFT));
62+
63+
// Generate multiple work items
64+
Ind2 := NORMALIZE(Ind1, numWIs, TRANSFORM(RECORDOF(LEFT),
65+
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER),
66+
SELF := LEFT));
67+
Dep2 := NORMALIZE(Dep1, numWIs, TRANSFORM(RECORDOF(LEFT),
68+
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER),
69+
SELF := LEFT));
70+
71+
Forest := LT.ClassificationForest(numTrees, numFeatures, maxDepth, nominalFields, balanceClasses);
72+
model := Forest.GetModel(Ind2, Dep2);
73+
74+
IndTest1 := PROJECT(testNF(number != DependentVar), TRANSFORM(NumericField,
75+
SELF.number := IF(nonSequentialIds, 5*LEFT.number, LEFT.number),
76+
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id),
77+
SELF := LEFT));
78+
DepCmp1 := PROJECT(testNF(number = DependentVar), TRANSFORM(DiscreteField,
79+
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id),
80+
SELF := LEFT));
81+
82+
// Generate multiple work items
83+
IndTest2 := NORMALIZE(IndTest1, numWIs, TRANSFORM(RECORDOF(LEFT),
84+
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER),
85+
SELF := LEFT));
86+
DepCmp2 := NORMALIZE(DepCmp1, numWIs, TRANSFORM(RECORDOF(LEFT),
87+
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER),
88+
SELF.number := 1;
89+
SELF := LEFT));
90+
91+
RETURN Forest.Accuracy(model, DepCmp2, IndTest2);
92+
END;
93+
94+
accuracy := ClassTest();
95+
96+
// Both work items should be at least 78% accurate
97+
OUTPUT(accuracy, {passing := IF((COUNT(GROUP, raw_accuracy >= 0.78) = numWIs), 'Pass', 'Fail: ' + raw_accuracy + ' < 0.78')}, NAMED('Result'));

ecl/CommonPrefixTest2.ecl

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*##############################################################################
2+
3+
HPCC SYSTEMS software Copyright (C) 2022 HPCC Systems®.
4+
5+
Licensed under the Apache License, Version 2.0 (the "License");
6+
you may not use this file except in compliance with the License.
7+
You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
############################################################################## */
17+
18+
// Updated version of CommonPrefixLenTest that outputs whether the correct
19+
// output is reached or what any differences were, plus additional inputs
20+
21+
IMPORT $.^ AS LT;
22+
IMPORT LT.Internal AS int;
23+
24+
inp1 := [1, 2, 3, 4, 5];
25+
inp2 := [1, 2, 4, 5, 6, 7];
26+
inp3 := [1, 2, 3, 4, 5, 6, 7];
27+
inp4 := [2, 3, 4, 5];
28+
inp5 := [7, 9, 13, 20];
29+
inp6 := [7, 9, 13, 20];
30+
31+
Res1x2 := int.CommonPrefixLen(inp1, inp2);
32+
Res1x3 := int.CommonPrefixLen(inp1, inp3);
33+
Res2x3 := int.CommonPrefixLen(inp2, inp3);
34+
Res3x2 := int.CommonPrefixLen(inp3, inp2); // Test that function is symmetrix as f(2, 3) should equal f(3, 2)
35+
Res3x4 := int.CommonPrefixLen(inp3, inp4);
36+
Res5x6 := int.CommonPrefixLen(inp5, inp6); // Equal sets, should return the length
37+
38+
Expected1x2 := 2;
39+
Expected1x3 := 5;
40+
Expected2x3 := 2;
41+
Expected3x2 := 2;
42+
Expected3x4 := 0;
43+
Expected5x6 := 4;
44+
45+
Test_Result := RECORD
46+
STRING Test;
47+
INTEGER Expected;
48+
INTEGER Result;
49+
END;
50+
51+
tests := DATASET([{'1x2', Expected1x2, Res1x2},
52+
{'1x3', Expected1x3, Res1x3},
53+
{'2x3', Expected2x3, Res2x3},
54+
{'3x2', Expected3x2, Res3x2},
55+
{'3x4', Expected3x4, Res3x4},
56+
{'5x6', Expected5x6, Res5x6}], Test_Result);
57+
58+
OUTPUT(IF(COUNT(tests(Expected != Result)) = 0, 'All Tests Passed', 'Test Cases Failed'), NAMED('Result'));
59+
OUTPUT(tests(Expected != Result), NAMED('Errors'));

ecl/RegressionTestModified.ecl

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*##############################################################################
2+
3+
HPCC SYSTEMS software Copyright (C) 2022 HPCC Systems®.
4+
5+
Licensed under the Apache License, Version 2.0 (the "License");
6+
you may not use this file except in compliance with the License.
7+
You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
############################################################################## */
17+
18+
#ONWARNING(4550, ignore);
19+
20+
// Modified version of the testCovTypeReg test file that works with the
21+
// OBT test system
22+
23+
IMPORT $.^.test.datasets.CovTypeDS;
24+
IMPORT $.^ AS LT;
25+
IMPORT LT.LT_Types;
26+
IMPORT ML_Core;
27+
IMPORT ML_Core.Types;
28+
29+
numTrees := 400;
30+
maxDepth := 255;
31+
numFeatures := 0; // Zero is automatic choice
32+
nonSequentialIds := TRUE; // True to renumber ids, numbers and work-items to test
33+
// support for non-sequentiality
34+
numWIs := 1; // The number of independent work-items to create
35+
maxRecs := 500; // Note that this has to be less than or equal to the number of records
36+
// in CovTypeDS (currently 500)
37+
38+
maxTestRecs := 100;
39+
NumericField := Types.NumericField;
40+
trainDat := CovTypeDS.trainRecs;
41+
testDat := CovTypeDS.testRecs;
42+
nominalFields := CovTypeDS.nominalCols;
43+
DependentVar := 1; // Dependent Variable meant for this function
44+
45+
RegressTest() := FUNCTION
46+
47+
ML_Core.ToField(trainDat, trainNF); // Get training data as a field
48+
ML_Core.ToField(testDat, testNF); // Get test data as a field
49+
50+
// Take out the first field from training set (Elevation) to use as the target value. Re-number the other fields
51+
// to fill the gap
52+
53+
//Ind = independent, Dep = dependent
54+
Ind1 := PROJECT(trainNF(number != DependentVar AND id <= maxRecs), TRANSFORM(NumericField,
55+
SELF.number := IF(nonSequentialIds, (5*LEFT.number -1), LEFT.number -1),
56+
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id),
57+
SELF := LEFT));
58+
Dep1 := PROJECT(trainNF(number = DependentVar AND id <= maxRecs), TRANSFORM(NumericField,
59+
SELF.number := DependentVar,
60+
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id),
61+
SELF := LEFT));
62+
63+
// Generate multiple work items
64+
Ind2 := NORMALIZE(Ind1, numWIs, TRANSFORM(RECORDOF(LEFT),
65+
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER),
66+
SELF := LEFT));
67+
Dep2 := NORMALIZE(Dep1, numWIs, TRANSFORM(RECORDOF(LEFT),
68+
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER),
69+
SELF := LEFT));
70+
71+
Forest := LT.RegressionForest(numTrees:=numTrees, featuresPerNode:=numFeatures, maxDepth:=maxDepth, nominalFields:=nominalFields);
72+
model := Forest.GetModel(Ind2, Dep2);
73+
74+
maxTestId := MIN(testNF, id) + maxTestRecs;
75+
testNF2 := testNF(id < maxTestId);
76+
77+
Indtest1 := PROJECT(testNF2(number != DependentVar), TRANSFORM(NumericField,
78+
SELF.number := IF(nonSequentialIds, (5*LEFT.number -1), LEFT.number -1),
79+
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id),
80+
SELF := LEFT));
81+
DepCmp1 := PROJECT(testNF2(number = DependentVar), TRANSFORM(NumericField,
82+
SELF.number := DependentVar,
83+
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id),
84+
SELF := LEFT));
85+
86+
// Generate multiple work items
87+
IndTest2 := NORMALIZE(IndTest1, numWIs, TRANSFORM(RECORDOF(LEFT),
88+
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER),
89+
SELF := LEFT));
90+
DepCmp2 := NORMALIZE(DepCmp1, numWIs, TRANSFORM(RECORDOF(LEFT),
91+
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER),
92+
SELF := LEFT));
93+
94+
// Determine accuracy
95+
RETURN Forest.Accuracy(model, DepCmp2, IndTest2);
96+
END;
97+
98+
accuracy := RegressTest();
99+
100+
// Result should be at least 70% accurate
101+
OUTPUT(accuracy, {passing := IF(r2 > 0.70, 'Pass', 'Fail, ' + r2)}, NAMED('Result'));
+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<Dataset name='Result'>
2+
<Row><passing>Pass</passing></Row>
3+
</Dataset>

ecl/key/CommonPrefixTest2.xml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<Dataset name='Result'>
2+
<Row><Result>All Tests Passed</Result></Row>
3+
</Dataset>
4+
<Dataset name='Errors'>
5+
</Dataset>

ecl/key/RegressionTestModified.xml

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<Dataset name='Result'>
2+
<Row><passing>Pass</passing></Row>
3+
</Dataset>

0 commit comments

Comments
 (0)