-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #12 from NSel1727/main
HPCC-27713 - Create confidence test and key file for the Learning Trees bundle Reviewed-By: Attila Vamos [email protected] Reviewed-by: Gavin Halliday <[email protected]> Merged-by: Gavin Halliday <[email protected]>
- Loading branch information
Showing
6 changed files
with
268 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
/*############################################################################## | ||
HPCC SYSTEMS software Copyright (C) 2022 HPCC Systems®. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
############################################################################## */ | ||
|
||
#ONWARNING(2007, ignore); | ||
#ONWARNING(4531, ignore); | ||
#ONWARNING(4550, ignore); | ||
|
||
// Modified version of the testCovTypeClass test file that works with the | ||
// OBT test system | ||
|
||
IMPORT $.^.test.datasets.CovTypeDS; | ||
IMPORT $.^ AS LT; | ||
IMPORT LT.LT_Types; | ||
IMPORT ML_Core; | ||
IMPORT ML_Core.Types; | ||
|
||
numTrees := 100; | ||
maxDepth := 255; | ||
numFeatures := 0; // Zero is automatic choice | ||
balanceClasses := FALSE; | ||
nonSequentialIds := TRUE; // True to renumber ids, numbers and work-items to test | ||
// support for non-sequentiality | ||
numWIs := 2; // The number of independent work-items to create | ||
maxRecs := 5000; // Note that this has to be less than or equal to the number of records | ||
// in CovTypeDS (currently 5000) | ||
DependentVar := 52; // Dependent Variable meant for this function | ||
|
||
|
||
DiscreteField := Types.DiscreteField; | ||
NumericField := Types.NumericField; | ||
trainDat := CovTypeDS.trainRecs; | ||
testDat := CovTypeDS.testRecs; | ||
nominalFields := CovTypeDS.nominalCols; | ||
|
||
ClassTest() := FUNCTION | ||
ML_Core.ToField(trainDat, trainNF); // Get training data as a field | ||
ML_Core.ToField(testDat, testNF); // Get test data as a field | ||
|
||
//Ind = independent, Dep = dependent | ||
Ind1 := PROJECT(trainNF(number != DependentVar AND id <= maxRecs), TRANSFORM(NumericField, | ||
SELF.number := IF(nonSequentialIds, 5*LEFT.number, LEFT.number), | ||
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), | ||
SELF := LEFT)); | ||
Dep1 := PROJECT(trainNF(number = DependentVar AND id <= maxRecs), TRANSFORM(DiscreteField, | ||
SELF.number := 1, | ||
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), | ||
SELF := LEFT)); | ||
|
||
// Generate multiple work items | ||
Ind2 := NORMALIZE(Ind1, numWIs, TRANSFORM(RECORDOF(LEFT), | ||
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), | ||
SELF := LEFT)); | ||
Dep2 := NORMALIZE(Dep1, numWIs, TRANSFORM(RECORDOF(LEFT), | ||
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), | ||
SELF := LEFT)); | ||
|
||
Forest := LT.ClassificationForest(numTrees, numFeatures, maxDepth, nominalFields, balanceClasses); | ||
model := Forest.GetModel(Ind2, Dep2); | ||
|
||
IndTest1 := PROJECT(testNF(number != DependentVar), TRANSFORM(NumericField, | ||
SELF.number := IF(nonSequentialIds, 5*LEFT.number, LEFT.number), | ||
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), | ||
SELF := LEFT)); | ||
DepCmp1 := PROJECT(testNF(number = DependentVar), TRANSFORM(DiscreteField, | ||
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), | ||
SELF := LEFT)); | ||
|
||
// Generate multiple work items | ||
IndTest2 := NORMALIZE(IndTest1, numWIs, TRANSFORM(RECORDOF(LEFT), | ||
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), | ||
SELF := LEFT)); | ||
DepCmp2 := NORMALIZE(DepCmp1, numWIs, TRANSFORM(RECORDOF(LEFT), | ||
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), | ||
SELF.number := 1; | ||
SELF := LEFT)); | ||
|
||
RETURN Forest.Accuracy(model, DepCmp2, IndTest2); | ||
END; | ||
|
||
accuracy := ClassTest(); | ||
|
||
// Both work items should be at least 78% accurate | ||
OUTPUT(accuracy, {passing := IF((COUNT(GROUP, raw_accuracy >= 0.78) = numWIs), 'Pass', 'Fail: ' + raw_accuracy + ' < 0.78')}, NAMED('Result')); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
/*############################################################################## | ||
HPCC SYSTEMS software Copyright (C) 2022 HPCC Systems®. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
############################################################################## */ | ||
|
||
// Updated version of CommonPrefixLenTest that outputs whether the correct | ||
// output is reached or what any differences were, plus additional inputs | ||
|
||
IMPORT $.^ AS LT; | ||
IMPORT LT.Internal AS int; | ||
|
||
inp1 := [1, 2, 3, 4, 5]; | ||
inp2 := [1, 2, 4, 5, 6, 7]; | ||
inp3 := [1, 2, 3, 4, 5, 6, 7]; | ||
inp4 := [2, 3, 4, 5]; | ||
inp5 := [7, 9, 13, 20]; | ||
inp6 := [7, 9, 13, 20]; | ||
|
||
Res1x2 := int.CommonPrefixLen(inp1, inp2); | ||
Res1x3 := int.CommonPrefixLen(inp1, inp3); | ||
Res2x3 := int.CommonPrefixLen(inp2, inp3); | ||
Res3x2 := int.CommonPrefixLen(inp3, inp2); // Test that function is symmetrix as f(2, 3) should equal f(3, 2) | ||
Res3x4 := int.CommonPrefixLen(inp3, inp4); | ||
Res5x6 := int.CommonPrefixLen(inp5, inp6); // Equal sets, should return the length | ||
|
||
Expected1x2 := 2; | ||
Expected1x3 := 5; | ||
Expected2x3 := 2; | ||
Expected3x2 := 2; | ||
Expected3x4 := 0; | ||
Expected5x6 := 4; | ||
|
||
Test_Result := RECORD | ||
STRING Test; | ||
INTEGER Expected; | ||
INTEGER Result; | ||
END; | ||
|
||
tests := DATASET([{'1x2', Expected1x2, Res1x2}, | ||
{'1x3', Expected1x3, Res1x3}, | ||
{'2x3', Expected2x3, Res2x3}, | ||
{'3x2', Expected3x2, Res3x2}, | ||
{'3x4', Expected3x4, Res3x4}, | ||
{'5x6', Expected5x6, Res5x6}], Test_Result); | ||
|
||
OUTPUT(IF(COUNT(tests(Expected != Result)) = 0, 'All Tests Passed', 'Test Cases Failed'), NAMED('Result')); | ||
OUTPUT(tests(Expected != Result), NAMED('Errors')); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/*############################################################################## | ||
HPCC SYSTEMS software Copyright (C) 2022 HPCC Systems®. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
############################################################################## */ | ||
|
||
#ONWARNING(4550, ignore); | ||
|
||
// Modified version of the testCovTypeReg test file that works with the | ||
// OBT test system | ||
|
||
IMPORT $.^.test.datasets.CovTypeDS; | ||
IMPORT $.^ AS LT; | ||
IMPORT LT.LT_Types; | ||
IMPORT ML_Core; | ||
IMPORT ML_Core.Types; | ||
|
||
numTrees := 400; | ||
maxDepth := 255; | ||
numFeatures := 0; // Zero is automatic choice | ||
nonSequentialIds := TRUE; // True to renumber ids, numbers and work-items to test | ||
// support for non-sequentiality | ||
numWIs := 1; // The number of independent work-items to create | ||
maxRecs := 500; // Note that this has to be less than or equal to the number of records | ||
// in CovTypeDS (currently 500) | ||
|
||
maxTestRecs := 100; | ||
NumericField := Types.NumericField; | ||
trainDat := CovTypeDS.trainRecs; | ||
testDat := CovTypeDS.testRecs; | ||
nominalFields := CovTypeDS.nominalCols; | ||
DependentVar := 1; // Dependent Variable meant for this function | ||
|
||
RegressTest() := FUNCTION | ||
|
||
ML_Core.ToField(trainDat, trainNF); // Get training data as a field | ||
ML_Core.ToField(testDat, testNF); // Get test data as a field | ||
|
||
// Take out the first field from training set (Elevation) to use as the target value. Re-number the other fields | ||
// to fill the gap | ||
|
||
//Ind = independent, Dep = dependent | ||
Ind1 := PROJECT(trainNF(number != DependentVar AND id <= maxRecs), TRANSFORM(NumericField, | ||
SELF.number := IF(nonSequentialIds, (5*LEFT.number -1), LEFT.number -1), | ||
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), | ||
SELF := LEFT)); | ||
Dep1 := PROJECT(trainNF(number = DependentVar AND id <= maxRecs), TRANSFORM(NumericField, | ||
SELF.number := DependentVar, | ||
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), | ||
SELF := LEFT)); | ||
|
||
// Generate multiple work items | ||
Ind2 := NORMALIZE(Ind1, numWIs, TRANSFORM(RECORDOF(LEFT), | ||
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), | ||
SELF := LEFT)); | ||
Dep2 := NORMALIZE(Dep1, numWIs, TRANSFORM(RECORDOF(LEFT), | ||
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), | ||
SELF := LEFT)); | ||
|
||
Forest := LT.RegressionForest(numTrees:=numTrees, featuresPerNode:=numFeatures, maxDepth:=maxDepth, nominalFields:=nominalFields); | ||
model := Forest.GetModel(Ind2, Dep2); | ||
|
||
maxTestId := MIN(testNF, id) + maxTestRecs; | ||
testNF2 := testNF(id < maxTestId); | ||
|
||
Indtest1 := PROJECT(testNF2(number != DependentVar), TRANSFORM(NumericField, | ||
SELF.number := IF(nonSequentialIds, (5*LEFT.number -1), LEFT.number -1), | ||
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), | ||
SELF := LEFT)); | ||
DepCmp1 := PROJECT(testNF2(number = DependentVar), TRANSFORM(NumericField, | ||
SELF.number := DependentVar, | ||
SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), | ||
SELF := LEFT)); | ||
|
||
// Generate multiple work items | ||
IndTest2 := NORMALIZE(IndTest1, numWIs, TRANSFORM(RECORDOF(LEFT), | ||
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), | ||
SELF := LEFT)); | ||
DepCmp2 := NORMALIZE(DepCmp1, numWIs, TRANSFORM(RECORDOF(LEFT), | ||
SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), | ||
SELF := LEFT)); | ||
|
||
// Determine accuracy | ||
RETURN Forest.Accuracy(model, DepCmp2, IndTest2); | ||
END; | ||
|
||
accuracy := RegressTest(); | ||
|
||
// Result should be at least 70% accurate | ||
OUTPUT(accuracy, {passing := IF(r2 > 0.70, 'Pass', 'Fail, ' + r2)}, NAMED('Result')); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
<Dataset name='Result'> | ||
<Row><passing>Pass</passing></Row> | ||
</Dataset> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
<Dataset name='Result'> | ||
<Row><Result>All Tests Passed</Result></Row> | ||
</Dataset> | ||
<Dataset name='Errors'> | ||
</Dataset> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
<Dataset name='Result'> | ||
<Row><passing>Pass</passing></Row> | ||
</Dataset> |