Skip to content

Commit 1a96908

Browse files
committed
Nominal attribute values not present in the data not considered in the induction.
1 parent 3a2a3ef commit 1a96908

File tree

5 files changed

+25
-17
lines changed

5 files changed

+25
-17
lines changed

adaa.analytics.rules/build.gradle

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ plugins {
55
id 'java'
66
}
77

8-
version = '2.1.18b'
8+
version = '2.1.19'
99
java {
1010
sourceCompatibility = JavaVersion.VERSION_1_8
1111
}

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/induction/AbstractFinder.java

+9-5
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ public abstract class AbstractFinder implements AutoCloseable {
5858

5959
private List<IFinderObserver> observers = new ArrayList<IFinderObserver>();
6060

61-
protected Map<IAttribute, Integer[]> attributeValuesOrder
62-
= new HashMap<IAttribute, Integer[]>();
61+
protected Map<IAttribute, List<Integer>> attributeValuesOrder
62+
= new HashMap<IAttribute, List<Integer>>();
6363

6464
public void addObserver(IFinderObserver o) { observers.add(o); }
6565
public void clearObservers() { observers.clear(); }
@@ -94,17 +94,21 @@ public void preprocess(IExampleSet trainSet) {
9494

9595
for (IAttribute attr : attributes) {
9696

97-
Integer[] valuesOrder = null;
97+
List<Integer> valuesOrder = null;
9898

9999
// check if attribute is nominal
100100
if (attr.isNominal()) {
101101
// get orders
102-
valuesOrder = new Integer[attr.getMapping().size()];
102+
valuesOrder = new ArrayList<Integer>();
103103
List<String> labels = new ArrayList<>();
104104
labels.addAll(attr.getMapping().getValues());
105105
Collections.sort(labels);
106106
for (int j = 0; j < labels.size(); ++j) {
107-
valuesOrder[j] = attr.getMapping().getIndex(labels.get(j));
107+
int index = attr.getMapping().getIndex(labels.get(j));
108+
109+
if (trainSet.getDoubleColumn(attr).contains((double)index)) {
110+
valuesOrder.add(index);
111+
}
108112
}
109113
}
110114

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/induction/ApproximateClassificationFinder.java

+8-4
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,16 @@ public void preprocess(IExampleSet dataset) {
104104

105105
if (attr.isNominal()) {
106106
// get orders
107-
Integer[] valuesOrder = new Integer[attr.getMapping().size()];
107+
List<Integer> valuesOrder = new ArrayList<Integer>();
108108
List<String> labels = new ArrayList<>();
109109
labels.addAll(attr.getMapping().getValues());
110110
Collections.sort(labels);
111111
for (int j = 0; j < labels.size(); ++j) {
112-
valuesOrder[j] = attr.getMapping().getIndex(labels.get(j));
112+
int index = attr.getMapping().getIndex(labels.get(j));
113+
114+
if (trainSet.getDoubleColumn(attr).contains((double)index)) {
115+
valuesOrder.add(index);
116+
}
113117
}
114118
attributeValuesOrder.put(attr, valuesOrder);
115119
}
@@ -431,8 +435,8 @@ class Stats {
431435
stats[0] = new Stats(0, 0, 0);
432436
stats[1] = new Stats(finalCovered_p - stats[0].p, finalCovered_n - stats[0].n, finalCovered_new_p - stats[0].p_new);
433437

434-
for (int j = 0; j < attr.getMapping().size(); ++j) {
435-
int bid = attributeValuesOrder.get(attr)[j];
438+
for (int j = 0; j < attributeValuesOrder.get(attr).size(); ++j) {
439+
int bid = attributeValuesOrder.get(attr).get(j);
436440

437441
// update stats
438442
stats[0].p = cur_positives[bid];

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/induction/ClassificationFinder.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -729,13 +729,13 @@ class TotalPosNeg {
729729
} else {
730730
// unweighted case
731731
// try all possible conditions
732-
Integer[] attributeValueOrder = attributeValuesOrder.get(attr);
732+
List<Integer> attributeValueOrder = attributeValuesOrder.get(attr);
733733
Map<Double, IntegerBitSet> precalculatedCovering = precalculatedCoverings.get(attr);
734734
Map<Double, IntegerBitSet> precalculatedCoveringComplement = precalculatedCoveringsComplement.get(attr);
735-
for (int j = 0; j < attr.getMapping().size(); ++j) {
735+
for (int j = 0; j < attributeValueOrder.size(); ++j) {
736736

737737
// evaluate straight condition
738-
int i = attributeValueOrder[j];
738+
int i = attributeValueOrder.get(j);
739739
IntegerBitSet conditionCovered = precalculatedCovering.get((double) i);
740740
double p = conditionCovered.calculateIntersectionSize(rule.getCoveredPositives());
741741
int toCover_p = conditionCovered.calculateIntersectionSize((IntegerBitSet) coveredByRule, (IntegerBitSet) uncoveredPositives);

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/induction/RegressionFinder.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,10 @@ class Stats{
241241

242242
} else {
243243
// try all possible conditions
244-
for (int j = 0; j < attr.getMapping().size(); ++j) {
244+
for (int j = 0; j < attributeValuesOrder.get(attr).size(); ++j) {
245245

246246
// evaluate straight condition
247-
int i = attributeValuesOrder.get(attr)[j];
247+
int i = attributeValuesOrder.get(attr).get(j);
248248
ElementaryCondition candidate = new ElementaryCondition(
249249
attr.getName(), new SingletonSet((double)i, attr.getMapping().getValues()));
250250
checkCandidate(dataset, rule, candidate, uncovered, covered, best);
@@ -358,10 +358,10 @@ protected ElementaryCondition induceCondition(
358358
}
359359
} else {
360360
// try all possible conditions
361-
for (int j = 0; j < attr.getMapping().size(); ++j) {
361+
for (int j = 0; j < attributeValuesOrder.get(attr).size(); ++j) {
362362

363363
// evaluate straight condition
364-
int i = attributeValuesOrder.get(attr)[j];
364+
int i = attributeValuesOrder.get(attr).get(j);
365365
ElementaryCondition candidate = new ElementaryCondition(
366366
attr.getName(), new SingletonSet((double)i, attr.getMapping().getValues()));
367367
checkCandidate(dataset, rule, candidate, uncovered, covered, best);

0 commit comments

Comments
 (0)