diff --git a/ecl/ClassificationTestModified.ecl b/ecl/ClassificationTestModified.ecl new file mode 100644 index 0000000..31f0639 --- /dev/null +++ b/ecl/ClassificationTestModified.ecl @@ -0,0 +1,97 @@ +/*############################################################################## + + HPCC SYSTEMS software Copyright (C) 2022 HPCC Systems®. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +############################################################################## */ + +#ONWARNING(2007, ignore); +#ONWARNING(4531, ignore); +#ONWARNING(4550, ignore); + +// Modified version of the testCovTypeClass test file that works with the +// OBT test system + +IMPORT $.^.test.datasets.CovTypeDS; +IMPORT $.^ AS LT; +IMPORT LT.LT_Types; +IMPORT ML_Core; +IMPORT ML_Core.Types; + +numTrees := 100; +maxDepth := 255; +numFeatures := 0; // Zero is automatic choice +balanceClasses := FALSE; +nonSequentialIds := TRUE; // True to renumber ids, numbers and work-items to test + // support for non-sequentiality +numWIs := 2; // The number of independent work-items to create +maxRecs := 5000; // Note that this has to be less than or equal to the number of records + // in CovTypeDS (currently 5000) +DependentVar := 52; // Dependent Variable meant for this function + + +DiscreteField := Types.DiscreteField; +NumericField := Types.NumericField; +trainDat := CovTypeDS.trainRecs; +testDat := CovTypeDS.testRecs; +nominalFields := CovTypeDS.nominalCols; + +ClassTest() := FUNCTION + ML_Core.ToField(trainDat, trainNF); // Get training data as a field + ML_Core.ToField(testDat, testNF); // Get test data as a field + + //Ind = independent, Dep = dependent + Ind1 := PROJECT(trainNF(number != DependentVar AND id <= maxRecs), TRANSFORM(NumericField, + SELF.number := IF(nonSequentialIds, 5*LEFT.number, LEFT.number), + SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), + SELF := LEFT)); + Dep1 := PROJECT(trainNF(number = DependentVar AND id <= maxRecs), TRANSFORM(DiscreteField, + SELF.number := 1, + SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), + SELF := LEFT)); + + // Generate multiple work items + Ind2 := NORMALIZE(Ind1, numWIs, TRANSFORM(RECORDOF(LEFT), + SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), + SELF := LEFT)); + Dep2 := NORMALIZE(Dep1, numWIs, TRANSFORM(RECORDOF(LEFT), + SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), + SELF := LEFT)); + + Forest := LT.ClassificationForest(numTrees, numFeatures, maxDepth, nominalFields, balanceClasses); + model := Forest.GetModel(Ind2, Dep2); + + IndTest1 := PROJECT(testNF(number != DependentVar), TRANSFORM(NumericField, + SELF.number := IF(nonSequentialIds, 5*LEFT.number, LEFT.number), + SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), + SELF := LEFT)); + DepCmp1 := PROJECT(testNF(number = DependentVar), TRANSFORM(DiscreteField, + SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), + SELF := LEFT)); + + // Generate multiple work items + IndTest2 := NORMALIZE(IndTest1, numWIs, TRANSFORM(RECORDOF(LEFT), + SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), + SELF := LEFT)); + DepCmp2 := NORMALIZE(DepCmp1, numWIs, TRANSFORM(RECORDOF(LEFT), + SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), + SELF.number := 1; + SELF := LEFT)); + + RETURN Forest.Accuracy(model, DepCmp2, IndTest2); +END; + +accuracy := ClassTest(); + +// Both work items should be at least 78% accurate +OUTPUT(accuracy, {passing := IF((COUNT(GROUP, raw_accuracy >= 0.78) = numWIs), 'Pass', 'Fail: ' + raw_accuracy + ' < 0.78')}, NAMED('Result')); diff --git a/ecl/CommonPrefixTest2.ecl b/ecl/CommonPrefixTest2.ecl new file mode 100644 index 0000000..a89fa26 --- /dev/null +++ b/ecl/CommonPrefixTest2.ecl @@ -0,0 +1,59 @@ +/*############################################################################## + + HPCC SYSTEMS software Copyright (C) 2022 HPCC Systems®. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +############################################################################## */ + +// Updated version of CommonPrefixLenTest that outputs whether the correct +// output is reached or what any differences were, plus additional inputs + +IMPORT $.^ AS LT; +IMPORT LT.Internal AS int; + +inp1 := [1, 2, 3, 4, 5]; +inp2 := [1, 2, 4, 5, 6, 7]; +inp3 := [1, 2, 3, 4, 5, 6, 7]; +inp4 := [2, 3, 4, 5]; +inp5 := [7, 9, 13, 20]; +inp6 := [7, 9, 13, 20]; + +Res1x2 := int.CommonPrefixLen(inp1, inp2); +Res1x3 := int.CommonPrefixLen(inp1, inp3); +Res2x3 := int.CommonPrefixLen(inp2, inp3); +Res3x2 := int.CommonPrefixLen(inp3, inp2); // Test that function is symmetrix as f(2, 3) should equal f(3, 2) +Res3x4 := int.CommonPrefixLen(inp3, inp4); +Res5x6 := int.CommonPrefixLen(inp5, inp6); // Equal sets, should return the length + +Expected1x2 := 2; +Expected1x3 := 5; +Expected2x3 := 2; +Expected3x2 := 2; +Expected3x4 := 0; +Expected5x6 := 4; + +Test_Result := RECORD + STRING Test; + INTEGER Expected; + INTEGER Result; +END; + +tests := DATASET([{'1x2', Expected1x2, Res1x2}, + {'1x3', Expected1x3, Res1x3}, + {'2x3', Expected2x3, Res2x3}, + {'3x2', Expected3x2, Res3x2}, + {'3x4', Expected3x4, Res3x4}, + {'5x6', Expected5x6, Res5x6}], Test_Result); + +OUTPUT(IF(COUNT(tests(Expected != Result)) = 0, 'All Tests Passed', 'Test Cases Failed'), NAMED('Result')); +OUTPUT(tests(Expected != Result), NAMED('Errors')); \ No newline at end of file diff --git a/ecl/RegressionTestModified.ecl b/ecl/RegressionTestModified.ecl new file mode 100644 index 0000000..5dd7238 --- /dev/null +++ b/ecl/RegressionTestModified.ecl @@ -0,0 +1,101 @@ +/*############################################################################## + + HPCC SYSTEMS software Copyright (C) 2022 HPCC Systems®. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +############################################################################## */ + +#ONWARNING(4550, ignore); + +// Modified version of the testCovTypeReg test file that works with the +// OBT test system + +IMPORT $.^.test.datasets.CovTypeDS; +IMPORT $.^ AS LT; +IMPORT LT.LT_Types; +IMPORT ML_Core; +IMPORT ML_Core.Types; + +numTrees := 400; +maxDepth := 255; +numFeatures := 0; // Zero is automatic choice +nonSequentialIds := TRUE; // True to renumber ids, numbers and work-items to test + // support for non-sequentiality +numWIs := 1; // The number of independent work-items to create +maxRecs := 500; // Note that this has to be less than or equal to the number of records + // in CovTypeDS (currently 500) + +maxTestRecs := 100; +NumericField := Types.NumericField; +trainDat := CovTypeDS.trainRecs; +testDat := CovTypeDS.testRecs; +nominalFields := CovTypeDS.nominalCols; +DependentVar := 1; // Dependent Variable meant for this function + +RegressTest() := FUNCTION + + ML_Core.ToField(trainDat, trainNF); // Get training data as a field + ML_Core.ToField(testDat, testNF); // Get test data as a field + + // Take out the first field from training set (Elevation) to use as the target value. Re-number the other fields + // to fill the gap + + //Ind = independent, Dep = dependent + Ind1 := PROJECT(trainNF(number != DependentVar AND id <= maxRecs), TRANSFORM(NumericField, + SELF.number := IF(nonSequentialIds, (5*LEFT.number -1), LEFT.number -1), + SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), + SELF := LEFT)); + Dep1 := PROJECT(trainNF(number = DependentVar AND id <= maxRecs), TRANSFORM(NumericField, + SELF.number := DependentVar, + SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), + SELF := LEFT)); + + // Generate multiple work items + Ind2 := NORMALIZE(Ind1, numWIs, TRANSFORM(RECORDOF(LEFT), + SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), + SELF := LEFT)); + Dep2 := NORMALIZE(Dep1, numWIs, TRANSFORM(RECORDOF(LEFT), + SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), + SELF := LEFT)); + + Forest := LT.RegressionForest(numTrees:=numTrees, featuresPerNode:=numFeatures, maxDepth:=maxDepth, nominalFields:=nominalFields); + model := Forest.GetModel(Ind2, Dep2); + + maxTestId := MIN(testNF, id) + maxTestRecs; + testNF2 := testNF(id < maxTestId); + + Indtest1 := PROJECT(testNF2(number != DependentVar), TRANSFORM(NumericField, + SELF.number := IF(nonSequentialIds, (5*LEFT.number -1), LEFT.number -1), + SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), + SELF := LEFT)); + DepCmp1 := PROJECT(testNF2(number = DependentVar), TRANSFORM(NumericField, + SELF.number := DependentVar, + SELF.id := IF(nonSequentialIds, 5*LEFT.id, LEFT.id), + SELF := LEFT)); + + // Generate multiple work items + IndTest2 := NORMALIZE(IndTest1, numWIs, TRANSFORM(RECORDOF(LEFT), + SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), + SELF := LEFT)); + DepCmp2 := NORMALIZE(DepCmp1, numWIs, TRANSFORM(RECORDOF(LEFT), + SELF.wi := IF(nonSequentialIds, 5*COUNTER, COUNTER), + SELF := LEFT)); + + // Determine accuracy + RETURN Forest.Accuracy(model, DepCmp2, IndTest2); +END; + +accuracy := RegressTest(); + +// Result should be at least 70% accurate +OUTPUT(accuracy, {passing := IF(r2 > 0.70, 'Pass', 'Fail, ' + r2)}, NAMED('Result')); diff --git a/ecl/key/ClassificationTestModified.xml b/ecl/key/ClassificationTestModified.xml new file mode 100644 index 0000000..d82dfbf --- /dev/null +++ b/ecl/key/ClassificationTestModified.xml @@ -0,0 +1,3 @@ + + Pass + \ No newline at end of file diff --git a/ecl/key/CommonPrefixTest2.xml b/ecl/key/CommonPrefixTest2.xml new file mode 100644 index 0000000..fa91f63 --- /dev/null +++ b/ecl/key/CommonPrefixTest2.xml @@ -0,0 +1,5 @@ + + All Tests Passed + + + \ No newline at end of file diff --git a/ecl/key/RegressionTestModified.xml b/ecl/key/RegressionTestModified.xml new file mode 100644 index 0000000..d82dfbf --- /dev/null +++ b/ecl/key/RegressionTestModified.xml @@ -0,0 +1,3 @@ + + Pass + \ No newline at end of file