Skip to content

Commit 0a92ccc

Browse files
committed
issue #270
1 parent b9205e1 commit 0a92ccc

File tree

4 files changed

+33
-17
lines changed

4 files changed

+33
-17
lines changed

curation-api/src/main/java/eu/clarin/cmdi/curation/api/conf/ApiConfig.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public class ApiConfig {
3838

3939
private String clientPassword;
4040

41-
private int maxThreads;
41+
private int maxInQueue;
4242

4343
@Bean
4444
public VloConfig vloConfig() {

curation-api/src/main/java/eu/clarin/cmdi/curation/api/subprocessor/collection/CollectionAggregator.java

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package eu.clarin.cmdi.curation.api.subprocessor.collection;
22

3+
import eu.clarin.cmdi.curation.api.CurationModule;
34
import eu.clarin.cmdi.curation.api.conf.ApiConfig;
45
import eu.clarin.cmdi.curation.api.entity.CMDCollection;
56
import eu.clarin.cmdi.curation.api.entity.CMDInstance;
@@ -12,6 +13,8 @@
1213
import eu.clarin.cmdi.curation.api.report.collection.sec.ResProxyReport.InvalidReference;
1314
import eu.clarin.cmdi.curation.api.report.instance.CMDInstanceReport;
1415
import eu.clarin.cmdi.curation.api.exception.MalFunctioningProcessorException;
16+
import eu.clarin.cmdi.curation.api.report.profile.CMDProfileReport;
17+
import eu.clarin.cmdi.curation.cr.CRService;
1518
import eu.clarin.linkchecker.persistence.model.AggregatedStatus;
1619
import eu.clarin.linkchecker.persistence.repository.AggregatedStatusRepository;
1720
import eu.clarin.linkchecker.persistence.repository.UrlRepository;
@@ -28,11 +31,9 @@
2831
import java.nio.file.Files;
2932
import java.nio.file.Path;
3033
import java.nio.file.attribute.BasicFileAttributes;
31-
import java.util.ArrayList;
32-
import java.util.Collection;
33-
import java.util.HashMap;
34-
import java.util.Map;
34+
import java.util.*;
3535
import java.util.concurrent.*;
36+
import java.util.concurrent.atomic.AtomicInteger;
3637
import java.util.concurrent.locks.Lock;
3738
import java.util.concurrent.locks.ReentrantLock;
3839
import java.util.stream.Stream;
@@ -52,6 +53,8 @@ public class CollectionAggregator {
5253

5354
private final Map<String, Collection<String>> mdSelfLinks = new HashMap<>();
5455

56+
private final Map<String, AtomicInteger> profileUsage = new HashMap<>();
57+
5558
private final Lock lock = new ReentrantLock();
5659

5760
public CollectionAggregator(ApiConfig conf, ApplicationContext ctx, AggregatedStatusRepository aRep, UrlRepository uRep) {
@@ -77,7 +80,7 @@ public void process(CMDCollection collection, CollectionReport collectionReport)
7780

7881
final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor();
7982
//
80-
final Semaphore maxTreads = new Semaphore(conf.getMaxThreads());
83+
final Semaphore maxInQueue = new Semaphore(conf.getMaxInQueue());
8184

8285
try {
8386
Files.walkFileTree(collection.getPath(), new FileVisitor<>() {
@@ -91,6 +94,13 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) {
9194
@Override
9295
public FileVisitResult visitFile(Path filePath, BasicFileAttributes attrs) {
9396

97+
try {
98+
maxInQueue.acquire();
99+
}
100+
catch (InterruptedException e) {
101+
throw new RuntimeException(e);
102+
}
103+
94104
collectionReport.fileReport.numOfFiles++;
95105

96106
if (attrs.size() > collectionReport.fileReport.maxFileSize) {
@@ -111,16 +121,16 @@ public FileVisitResult visitFile(Path filePath, BasicFileAttributes attrs) {
111121

112122
CMDInstanceReport instanceReport;
113123
try {
114-
maxTreads.acquire();
115124
instanceReport = instance.generateReport();
116125
}
117-
catch (MalFunctioningProcessorException | InterruptedException e) {
126+
catch (MalFunctioningProcessorException e) {
118127
executor.shutdownNow();
119128
throw new RuntimeException(e);
120-
} finally {
121-
maxTreads.release();
122129
}
123-
130+
finally {
131+
maxInQueue.release();
132+
}
133+
CollectionAggregator.this.lock.lock();
124134
addReport(collectionReport, instanceReport);
125135

126136
}); // end executor.execute
@@ -168,8 +178,6 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) {
168178
*/
169179
public void addReport(CollectionReport collectionReport, CMDInstanceReport instanceReport) {
170180

171-
this.lock.lock();
172-
173181
try {
174182
if (!instanceReport.details.isEmpty()) {//only add a record if there are details to report
175183

@@ -254,7 +262,10 @@ public void addReport(CollectionReport collectionReport, CMDInstanceReport insta
254262
.count++
255263
);
256264
collectionReport.facetReport.aggregatedScore += instanceReport.facetReport.score;
257-
} else {
265+
266+
this.profileUsage.computeIfAbsent(instanceReport.profileHeaderReport.getSchemaLocation(), k -> new AtomicInteger()).incrementAndGet();
267+
}
268+
else {
258269
collectionReport.fileReport.numOfFilesNonProcessable++;
259270
}
260271
}
@@ -448,6 +459,11 @@ private void calculateAverages(CollectionReport collectionReport) {
448459
collectionReport.avgScore = collectionReport.aggregatedScore
449460
/ (double) collectionReport.fileReport.numOfFilesProcessable;
450461

462+
// add profileUsage
463+
this.profileUsage.forEach((k,v) -> this.ctx.getBean(CurationModule.class)
464+
.processCMDProfile(k)
465+
.collectionUsage
466+
.add(new CMDProfileReport.CollectionUsage(collectionReport.fileReport.provider, v.get())));
451467
}
452468
}
453469
}

curation-api/src/test/resources/application.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ curation:
5050
out: >
5151
/tmp/public
5252
max_file_size: 100000
53-
maxThreads: 10
53+
max_in_queue: 10
5454
logging:
5555
level:
5656
root: ERROR

curation-app/src/main/resources/application.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,8 @@ curation:
102102
- temporalCoverage
103103
# maximum size of the CMD file. Bigger files are treated as non-processable
104104
max_file_size: 100000
105-
# maximum number of parallel threads in collection processing
106-
maxThreads: ${MAX_THREADS:10}
105+
# maximum number CMDI instances in the processing queue when processing collection
106+
max_in_queue: ${MAX_IN_QUEUE:10}
107107
# URL of the web application
108108
# default:= String ""
109109
#base_url:

0 commit comments

Comments
 (0)