Skip to content

Commit 6381829

Browse files
committed
refactor measures
1 parent 749cad4 commit 6381829

File tree

11 files changed

+178
-112
lines changed

11 files changed

+178
-112
lines changed

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/measure/AbsoluteCoverage.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
import java.util.HashMap;
3030
import java.util.Map;
3131

32-
public class AbsoluteCoverage extends Count<ResourcePair> {
32+
public class AbsoluteCoverage extends LongMeasure<ResourcePair> {
3333

3434
public AbsoluteCoverage() {
3535
super(AV.absoluteCoverage, OM.one);

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/measure/Ratio.java renamed to abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/measure/BigDecimalMeasure.java

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,12 @@
2323
import java.math.BigDecimal;
2424
import java.math.RoundingMode;
2525

26-
public abstract class Ratio<K> extends Measure<K, BigDecimal> {
26+
public abstract class BigDecimalMeasure<K> extends Measure<K, BigDecimal> {
2727

2828
public final static int SCALE = 16;
2929
public final static RoundingMode ROUNDING_MODE = RoundingMode.HALF_UP;
3030

31-
public Ratio(Resource quantity, Resource unit) {
31+
public BigDecimalMeasure(Resource quantity, Resource unit) {
3232
super(quantity, unit);
3333
}
34-
35-
public void setRatioOf(Count<K> numerators, Count<K> denominators) {
36-
for (K key : numerators.keySet()) {
37-
if (denominators.contains(key)) {
38-
BigDecimal numerator = BigDecimal.valueOf(numerators.get(key));
39-
BigDecimal denominator = BigDecimal.valueOf(denominators.get(key));
40-
set(key, numerator.divide(denominator, SCALE, ROUNDING_MODE));
41-
}
42-
}
43-
}
44-
45-
public void setRatioOf(Count<K> numerators, BigDecimal denominator) {
46-
for (K key : numerators.keySet()) {
47-
BigDecimal numerator = BigDecimal.valueOf(numerators.get(key));
48-
set(key, numerator.divide(denominator, SCALE, ROUNDING_MODE));
49-
}
50-
}
5134
}

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/measure/Completeness.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@
3030
import java.math.RoundingMode;
3131
import java.util.*;
3232

33-
public class Completeness extends Ratio<Resource> {
33+
public class Completeness extends BigDecimalMeasure<Resource> {
3434

3535
public Completeness() {
3636
super(AV.marCompletenessThomas08, OM.one);
3737
}
3838

39-
public static Completeness calculate(AbsoluteCoverage absoluteCoverage, PerDatasetCount deduplicatedCount) {
39+
public static Completeness calculate(AbsoluteCoverage absoluteCoverage, DeduplicatedCount deduplicatedCount) {
4040
Set<ResourcePair> datasetPairs = getDatasetPairsWithSufficientData(absoluteCoverage, deduplicatedCount);
4141
long totalPairwiseOverlap = calculateTotalPairwiseOverlap(datasetPairs, absoluteCoverage);
4242
if (totalPairwiseOverlap != 0) {
@@ -47,7 +47,7 @@ public static Completeness calculate(AbsoluteCoverage absoluteCoverage, PerDatas
4747
return new Completeness(); // empty
4848
}
4949

50-
private static Set<ResourcePair> getDatasetPairsWithSufficientData(AbsoluteCoverage absoluteCoverage, PerDatasetCount deduplicatedCount) {
50+
private static Set<ResourcePair> getDatasetPairsWithSufficientData(AbsoluteCoverage absoluteCoverage, DeduplicatedCount deduplicatedCount) {
5151
Set<ResourcePair> datasetPairs = absoluteCoverage.keySet();
5252
Set<Resource> datasetsWithDeduplicatedCount = deduplicatedCount.keySet();
5353
return ResourcePair.getPairsBothContainedIn(datasetPairs, datasetsWithDeduplicatedCount);
@@ -63,7 +63,7 @@ private static long calculateTotalPairwiseOverlap(Iterable<ResourcePair> dataset
6363
return totalPairwiseOverlap;
6464
}
6565

66-
private static BigDecimal calculateEstimatedPopulationSize(Iterable<ResourcePair> datasetPairs, PerDatasetCount deduplicatedCount, long totalPairwiseOverlap) {
66+
private static BigDecimal calculateEstimatedPopulationSize(Iterable<ResourcePair> datasetPairs, DeduplicatedCount deduplicatedCount, long totalPairwiseOverlap) {
6767
BigDecimal estimatedPopulationSize = BigDecimal.ZERO;
6868
for (ResourcePair datasetPair : datasetPairs) {
6969
BigDecimal deduplicatedCount1 = BigDecimal.valueOf(deduplicatedCount.get(datasetPair.first));
@@ -75,7 +75,7 @@ private static BigDecimal calculateEstimatedPopulationSize(Iterable<ResourcePair
7575
return estimatedPopulationSize;
7676
}
7777

78-
private static Completeness calculateCompleteness(Iterable<Resource> datasets, PerDatasetCount deduplicatedCount, BigDecimal estimatedPopulationSize) {
78+
private static Completeness calculateCompleteness(Iterable<Resource> datasets, DeduplicatedCount deduplicatedCount, BigDecimal estimatedPopulationSize) {
7979
Completeness completeness = new Completeness();
8080
for (Resource dataset : datasets) {
8181
BigDecimal numerator = BigDecimal.valueOf(deduplicatedCount.get(dataset));

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/measure/Count.java

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,38 +18,26 @@
1818

1919
package de.uni_jena.cs.fusion.abecto.measure;
2020

21-
import org.apache.jena.rdf.model.Resource;
21+
import de.uni_jena.cs.fusion.abecto.vocabulary.AV;
22+
import de.uni_jena.cs.fusion.abecto.vocabulary.OM;
2223

23-
public abstract class Count<K> extends Measure<K, Long> {
24+
import java.util.HashMap;
25+
import java.util.Map;
2426

25-
public Count(Resource quantity, Resource unit) {
26-
super(quantity, unit);
27-
}
28-
29-
public void setAllZero(Iterable<K> keys) {
30-
for (K key: keys) {
31-
setZero(key);
32-
}
33-
}
34-
35-
public void setZero(K key) {
36-
values.put(key, 0L);
37-
}
38-
39-
public void incrementByOrSetOne(K key) {
40-
incrementByOrSet(key, 1L);
41-
}
27+
public class Count extends PerDatasetLongMeasure {
4228

43-
public void incrementByOrSet(K key, long increment) {
44-
values.merge(key, increment, Long::sum);
29+
public Count() {
30+
super(AV.count, OM.one);
4531
}
4632

47-
public void setDifferenceOf(Count<K> minuend, Count<K> subtrahend) {
48-
for (K key : minuend.keySet()) {
49-
if (subtrahend.contains(key)) {
50-
set(key, minuend.get(key) - subtrahend.get(key));
51-
}
33+
public static Map<String, Count> createMapByVariable(Iterable<String> variables) {
34+
Map<String, Count> mapOfCounts = new HashMap<>();
35+
for (String variable : variables) {
36+
Count countOfVariable = new Count();
37+
countOfVariable.setVariable(variable);
38+
mapOfCounts.put(variable, countOfVariable);
5239
}
40+
return mapOfCounts;
5341
}
5442

5543
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*-
2+
* Copyright © 2019-2022 Heinz Nixdorf Chair for Distributed Information Systems,
3+
* Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
4+
* Copyright © 2023-2024 Jan Martin Keil ([email protected])
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
-*/
18+
19+
package de.uni_jena.cs.fusion.abecto.measure;
20+
21+
import de.uni_jena.cs.fusion.abecto.vocabulary.AV;
22+
import de.uni_jena.cs.fusion.abecto.vocabulary.OM;
23+
24+
import java.util.HashMap;
25+
import java.util.Map;
26+
27+
public class DeduplicatedCount extends PerDatasetLongMeasure {
28+
29+
public DeduplicatedCount() {
30+
super(AV.deduplicatedCount, OM.one);
31+
}
32+
33+
public static DeduplicatedCount calculate(Count count, DuplicateCount duplicateCount) {
34+
DeduplicatedCount deduplicatedCount = new DeduplicatedCount();
35+
deduplicatedCount.setDifferenceOf(count, duplicateCount);
36+
return deduplicatedCount;
37+
}
38+
39+
public static Map<String, DeduplicatedCount> createMapByVariable(Iterable<String> variables) {
40+
Map<String, DeduplicatedCount> mapOfCounts = new HashMap<>();
41+
for (String variable : variables) {
42+
DeduplicatedCount countOfVariable = new DeduplicatedCount();
43+
countOfVariable.setVariable(variable);
44+
mapOfCounts.put(variable, countOfVariable);
45+
}
46+
return mapOfCounts;
47+
}
48+
49+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*-
2+
* Copyright © 2019-2022 Heinz Nixdorf Chair for Distributed Information Systems,
3+
* Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
4+
* Copyright © 2023-2024 Jan Martin Keil ([email protected])
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
-*/
18+
19+
package de.uni_jena.cs.fusion.abecto.measure;
20+
21+
import de.uni_jena.cs.fusion.abecto.vocabulary.OM;
22+
23+
public class DuplicateCount extends PerDatasetLongMeasure {
24+
25+
public DuplicateCount() {
26+
super(null, OM.one); // TODO define measure IRI
27+
}
28+
29+
public static DuplicateCount calculate(Count count, DeduplicatedCount deduplicatedCount) {
30+
DuplicateCount duplicateCount = new DuplicateCount();
31+
duplicateCount.setDifferenceOf(count, deduplicatedCount);
32+
return duplicateCount;
33+
}
34+
35+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*-
2+
* Copyright © 2019-2022 Heinz Nixdorf Chair for Distributed Information Systems,
3+
* Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
4+
* Copyright © 2023-2024 Jan Martin Keil ([email protected])
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
-*/
18+
19+
package de.uni_jena.cs.fusion.abecto.measure;
20+
21+
import org.apache.jena.rdf.model.Resource;
22+
23+
public abstract class LongMeasure<K> extends Measure<K, Long> {
24+
25+
public LongMeasure(Resource quantity, Resource unit) {
26+
super(quantity, unit);
27+
}
28+
29+
public void setZero(K key) {
30+
values.put(key, 0L);
31+
}
32+
33+
public void incrementByOrSetOne(K key) {
34+
incrementByOrSet(key, 1L);
35+
}
36+
37+
public void incrementByOrSet(K key, long increment) {
38+
values.merge(key, increment, Long::sum);
39+
}
40+
41+
public void setDifferenceOf(LongMeasure<K> minuend, LongMeasure<K> subtrahend) {
42+
for (K key : minuend.keySet()) {
43+
if (subtrahend.contains(key)) {
44+
set(key, minuend.get(key) - subtrahend.get(key));
45+
}
46+
}
47+
}
48+
49+
}

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/measure/PerDatasetCount.java renamed to abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/measure/PerDatasetLongMeasure.java

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,14 @@
2323
import org.apache.jena.rdf.model.Model;
2424
import org.apache.jena.rdf.model.Resource;
2525

26-
import java.util.HashMap;
2726
import java.util.Map;
2827

29-
public class PerDatasetCount extends Count<Resource> {
28+
public abstract class PerDatasetLongMeasure extends LongMeasure<Resource> {
3029

31-
public PerDatasetCount(Resource quantity, Resource unit) {
30+
public PerDatasetLongMeasure(Resource quantity, Resource unit) {
3231
super(quantity, unit);
3332
}
3433

35-
public static Map<String, PerDatasetCount> createMapByVariable(Iterable<String> variables, Resource quantity, Resource unit) {
36-
Map<String, PerDatasetCount> mapOfCounts = new HashMap<>();
37-
for (String variable : variables) {
38-
PerDatasetCount countOfVariable = new PerDatasetCount(quantity, unit);
39-
countOfVariable.setVariable(variable);
40-
mapOfCounts.put(variable, countOfVariable);
41-
}
42-
return mapOfCounts;
43-
}
44-
4534
public void storeInModel(Aspect aspect, Map<Resource, Model> outputModelsMap) {
4635
for (Resource dataset : keySet()) {
4736
Metadata.addQualityMeasurement(quantity, get(dataset), unit, dataset, variable, aspect.getIri(), outputModelsMap.get(dataset));

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/measure/RelativeCoverage.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@
3131
import java.util.Map;
3232
import java.util.Set;
3333

34-
public class RelativeCoverage extends Ratio<ResourceTupel> {
34+
public class RelativeCoverage extends BigDecimalMeasure<ResourceTupel> {
3535

3636
public RelativeCoverage() {
3737
super(AV.relativeCoverage, OM.one);
3838
}
3939

40-
public static RelativeCoverage calculate(AbsoluteCoverage absoluteCoverage, PerDatasetCount deduplicatedCount) {
40+
public static RelativeCoverage calculate(AbsoluteCoverage absoluteCoverage, DeduplicatedCount deduplicatedCount) {
4141
RelativeCoverage relativeCoverage = new RelativeCoverage();
4242
Set<ResourcePair> datasetPairs = getDatasetPairsWithSufficientData(absoluteCoverage, deduplicatedCount);
4343
for (ResourcePair datasetPair : datasetPairs) {
@@ -48,13 +48,13 @@ public static RelativeCoverage calculate(AbsoluteCoverage absoluteCoverage, PerD
4848
return relativeCoverage;
4949
}
5050

51-
private static Set<ResourcePair> getDatasetPairsWithSufficientData(AbsoluteCoverage absoluteCoverage, PerDatasetCount deduplicatedCount) {
51+
private static Set<ResourcePair> getDatasetPairsWithSufficientData(AbsoluteCoverage absoluteCoverage, DeduplicatedCount deduplicatedCount) {
5252
Set<ResourcePair> datasetPairsWithAbsoluteCoverage = absoluteCoverage.keySet();
5353
Set<Resource> datasetsWithDeduplicatedCount = deduplicatedCount.keySet();
5454
return ResourcePair.getPairsBothContainedIn(datasetPairsWithAbsoluteCoverage, datasetsWithDeduplicatedCount);
5555
}
5656

57-
void setRatioForTupel(BigDecimal numerator, PerDatasetCount denominators, Resource assessedDataset, Resource comparedDataset) {
57+
void setRatioForTupel(BigDecimal numerator, DeduplicatedCount denominators, Resource assessedDataset, Resource comparedDataset) {
5858
BigDecimal denominator = BigDecimal.valueOf(denominators.get(comparedDataset));
5959
if (!denominator.equals(BigDecimal.ZERO)) {
6060
BigDecimal value = numerator.divide(denominator, SCALE, ROUNDING_MODE);

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/processor/PopulationComparisonProcessor.java

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
package de.uni_jena.cs.fusion.abecto.processor;
2020

21-
import java.math.BigDecimal;
2221
import java.util.*;
2322
import java.util.stream.Collectors;
2423
import java.util.stream.Stream;
@@ -28,9 +27,6 @@
2827
import org.apache.jena.rdf.model.Model;
2928
import org.apache.jena.rdf.model.Resource;
3029

31-
import de.uni_jena.cs.fusion.abecto.vocabulary.AV;
32-
import de.uni_jena.cs.fusion.abecto.vocabulary.OM;
33-
3430
/**
3531
* Provides measurements for <strong>number of resources</strong>,
3632
* <strong>absolute coverage</strong>, <strong>relative coverage</strong>, and
@@ -52,22 +48,11 @@ public class PopulationComparisonProcessor extends ComparisonProcessor<Populatio
5248
Set<ResourcePair> datasetPairs;
5349
Set<ResourceTupel> datasetTupels;
5450
Map<Resource, Model> outputMetaModelByDataset;
55-
/**
56-
* Number of covered resources of another dataset, excluding duplicates.
57-
*/
51+
5852
AbsoluteCoverage absoluteCoverage = new AbsoluteCoverage();
59-
/**
60-
* Number of resources in this dataset including duplicates.
61-
*/
62-
PerDatasetCount count = new PerDatasetCount(AV.count, OM.one);
63-
/**
64-
* Number of resource duplicates in this dataset.
65-
*/
66-
PerDatasetCount duplicateCount = new PerDatasetCount(null, OM.one);// TODO define measure IRI
67-
/**
68-
* Number of resources in this dataset excluding duplicates.
69-
*/
70-
PerDatasetCount deduplicatedCount = new PerDatasetCount(AV.deduplicatedCount, OM.one);
53+
Count count = new Count();
54+
DuplicateCount duplicateCount = new DuplicateCount();
55+
DeduplicatedCount deduplicatedCount = new DeduplicatedCount();
7156
RelativeCoverage relativeCoverage;
7257
Completeness completeness;
7358

@@ -88,7 +73,7 @@ void compareAspectPopulation(Aspect aspect) {
8873

8974
measureResourceCounts();
9075
countAndReportCoverageAndDuplicatesAndOmissions(getCorrespondenceGroups());
91-
calculateDeduplicatedCount();
76+
deduplicatedCount = DeduplicatedCount.calculate(count, duplicateCount);
9277
relativeCoverage = RelativeCoverage.calculate(absoluteCoverage, deduplicatedCount);
9378
completeness = Completeness.calculate(absoluteCoverage, deduplicatedCount);
9479

@@ -208,10 +193,6 @@ private void removeFromUnprocessedResources(Map<Resource, Set<Resource>> covered
208193
}
209194
}
210195

211-
private void calculateDeduplicatedCount() {
212-
deduplicatedCount.setDifferenceOf(count, duplicateCount);
213-
}
214-
215196
private void reportOmissionsOfUnprocessedResources() {
216197
for (ResourcePair datasetPair : datasetPairs) {
217198
reportOmissionsOfUnprocessedResourcesForResource(datasetPair.first, datasetPair.second);

0 commit comments

Comments
 (0)