Skip to content

Commit a936b0a

Browse files
authored
Merge pull request #108 from clarin-eric/dev
Dev
2 parents 3de6605 + a1b5eac commit a936b0a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+3189
-2169
lines changed

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
# Clarin Curation Module
1+
# Clarin Curation Dashboard
22

3-
Here is the current deployed instance of Curation Module: https://curation.clarin.eu/
3+
Here is the current deployed instance of Curation Dashboard: https://curation.clarin.eu/
44

5-
The goal of this project is to implement software component for curation and quality assessment which can be integrated in the CLARINs VLO workflow. Project is initialized by Metadata Curation Task Force. Specification for the Curation Module is based on the Metadata Quality Assessement Service proposal. Curation Module validates and normalizes single MD records, repositories and profiles, to assess their quality and to produce reports with different information for different actors in VLO workflow. For implementation this project will use some of the existing CLARIN components.
5+
The goal of this project is to implement software component for curation and quality assessment which can be integrated in the CLARINs VLO workflow. Project is initialized by Metadata Curation Task Force. Specification for the Curation Dashboard is based on the Metadata Quality Assessement Service proposal. Curation Dashboard validates and normalizes single MD records, repositories and profiles, to assess their quality and to produce reports with different information for different actors in VLO workflow. For implementation this project will use some of the existing CLARIN components.
66

7-
### curation module core
8-
Usable as stand-alone application to generate instance/collection reports and as required API in the curation web module
7+
### curation dashboard core
8+
Usable as stand-alone application to generate instance/collection reports and as required API in the curation web application
99

10-
### curation module web
10+
### curation dashboard web
1111
Deployable web application
1212

1313
### link checker

curation-module-core/pom.xml

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,22 @@
44
<parent>
55
<artifactId>curation-module</artifactId>
66
<groupId>eu.clarin.cmdi</groupId>
7-
<version>5.1.2</version>
7+
<version>5.2.0</version>
88
</parent>
99
<modelVersion>4.0.0</modelVersion>
1010
<artifactId>curation-module-core</artifactId>
11-
<version>5.1.2</version>
11+
<version>5.2.0</version>
1212
<properties>
1313
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
14-
<slf4j.version>1.7.25</slf4j.version>
15-
<rasa.version>4.0.1</rasa.version>
14+
<log4j.version>2.17.1</log4j.version>
15+
<rasa.version>4.1.0</rasa.version>
1616
</properties>
1717
<dependencies>
18-
18+
<dependency>
19+
<groupId>org.apache.logging.log4j</groupId>
20+
<artifactId>log4j-slf4j-impl</artifactId>
21+
<version>${log4j.version}</version>
22+
</dependency>
1923
<!--java 11 needed dependencies -->
2024
<dependency>
2125
<groupId>javax.xml.bind</groupId>
@@ -39,16 +43,6 @@
3943
</dependency>
4044
<!--java 11 dependencies end -->
4145

42-
<dependency>
43-
<groupId>org.slf4j</groupId>
44-
<artifactId>slf4j-api</artifactId>
45-
<version>${slf4j.version}</version>
46-
</dependency>
47-
<dependency>
48-
<groupId>org.slf4j</groupId>
49-
<artifactId>slf4j-log4j12</artifactId>
50-
<version>${slf4j.version}</version>
51-
</dependency>
5246
<dependency>
5347
<groupId>org.apache.commons</groupId>
5448
<artifactId>commons-lang3</artifactId>
@@ -57,7 +51,7 @@
5751
<dependency>
5852
<groupId>com.google.code.gson</groupId>
5953
<artifactId>gson</artifactId>
60-
<version>2.8.6</version>
54+
<version>2.8.9</version>
6155
</dependency>
6256
<dependency>
6357
<groupId>commons-io</groupId>
@@ -89,17 +83,7 @@
8983
<artifactId>httpclient</artifactId>
9084
<version>[4.5.13,)</version>
9185
</dependency>
92-
<dependency>
93-
<groupId>ch.vorburger.mariaDB4j</groupId>
94-
<artifactId>mariaDB4j</artifactId>
95-
<version>2.4.0</version>
96-
<scope>test</scope>
97-
</dependency>
98-
<dependency>
99-
<groupId>org.apache.ibatis</groupId>
100-
<artifactId>ibatis-core</artifactId>
101-
<version>3.0</version>
102-
</dependency>
86+
10387

10488
<!--github link: https://github.com/acdh-oeaw/stormychecker -->
10589
<!-- <dependency> -->
@@ -120,6 +104,14 @@
120104
resource-availability-status-api
121105
</artifactId>
122106
</exclusion>
107+
<exclusion>
108+
<groupId>log4j</groupId>
109+
<artifactId>log4j</artifactId>
110+
</exclusion>
111+
<exclusion>
112+
<groupId>org.slf4j</groupId>
113+
<artifactId>slf4j-log4j12</artifactId>
114+
</exclusion>
123115
</exclusions>
124116
</dependency>
125117
<dependency>

curation-module-core/script/weeklyUpdate.sh renamed to curation-module-core/script/updateDashboard.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ RESULTSETS="clarin.tar.bz2 others.tar.bz2 europeana.tar.bz2"
1212
#RESULTSETS="clarin.tar.bz2"
1313
CMDI_PATH=results/cmdi
1414

15-
LOG4J=-Dlog4j.configuration=file:$CONF_DIR/log4j.properties
1615
VM_ARGS="-Xms4G -Xmx8G -XX:+UseG1GC -XX:-UseParallelGC -XX:+UseStringDeduplication -XX:MaxHeapFreeRatio=20 -XX:MinHeapFreeRatio=10 -XX:GCTimeRatio=20"
1716

1817
XSD_CACHE=$WORK_DIR/xsd_cache
@@ -50,7 +49,7 @@ set -e
5049
#done
5150

5251
echo "generating new reports, downloading necessary profiles..."
53-
java $VM_ARGS -Dprojectname=curate $LOG4J -jar $BIN_DIR/curate.jar -config $CONF_DIR/config.properties -r -path $DATA_DIR/clarin/$CMDI_PATH $DATA_DIR/europeana/$CMDI_PATH
52+
java $VM_ARGS -Dprojectname=curate -jar $BIN_DIR/curate.jar -config $CONF_DIR/config.properties -r -path $DATA_DIR/clarin/$CMDI_PATH $DATA_DIR/europeana/$CMDI_PATH
5453
echo "report generation finished."
5554

5655
if [ -e "$BIN_DIR/vlo-mapping-creator.jar" ]; then

curation-module-core/src/main/java/eu/clarin/cmdi/curation/main/Configuration.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ public class Configuration {
3434
public static Path OUTPUT_DIRECTORY = null;
3535
public static Path CACHE_DIRECTORY = null;
3636
public static int THREADPOOL_SIZE = 100;
37+
public static String LINK_DATA_SOURCE;
3738
public static Collection<String> FACETS = null;
3839
public static int REDIRECT_FOLLOW_LIMIT;
3940
public static int TIMEOUT;
@@ -66,8 +67,6 @@ public static void initDefault() throws IOException {
6667
Properties config = new Properties();
6768
config.load(Configuration.class.getResourceAsStream("/config.properties"));
6869
readProperties(config);
69-
//readProperties(new PropertiesConfiguration("config.properties"));
70-
7170
}
7271

7372
public static void tearDown() {
@@ -93,6 +92,8 @@ private static void readProperties(Properties config) throws IOException {
9392
TIMEOUT = Integer.parseInt(timeout);
9493
}
9594
THREADPOOL_SIZE = Integer.parseInt(config.getProperty("THREADPOOL_SIZE", "100"));
95+
96+
LINK_DATA_SOURCE = config.getProperty("LINK_DATA_SOURCE");
9697

9798
String[] facets = config.getProperty("FACETS").split(",");
9899
FACETS = Arrays.stream(facets).map(String::trim).collect(Collectors.toList());

curation-module-core/src/main/java/eu/clarin/cmdi/curation/main/Main.java

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ public static void main(String[] args) throws Exception {
182182
// dumping the collections table
183183
dumpAsXML(collectionsReport, CurationEntityType.COLLECTION);
184184
dumpAsHTML(collectionsReport, CurationEntityType.COLLECTION);
185-
dumpAsTSV(collectionsReport, CurationEntityType.COLLECTION);
185+
// dumpAsTSV(collectionsReport, CurationEntityType.COLLECTION);
186186

187187
LOG.info("Creating collections table finished.");
188188

@@ -200,7 +200,7 @@ public static void main(String[] args) throws Exception {
200200
//dumping the profiles table
201201
dumpAsXML(profilesReport, CurationEntityType.PROFILE);
202202
dumpAsHTML(profilesReport, CurationEntityType.PROFILE);
203-
dumpAsTSV(profilesReport, CurationEntityType.PROFILE);
203+
// dumpAsTSV(profilesReport, CurationEntityType.PROFILE);
204204
LOG.info("Creating profiles table finished..");
205205

206206
LOG.info("Creating statistics table...");
@@ -287,33 +287,28 @@ private static void dumpAsHTML(Report<?> report, CurationEntityType type) throws
287287

288288
}
289289

290-
private static void dumpAsTSV(Report<?> report, CurationEntityType type) throws TransformerException, JAXBException, IOException {
291-
Path path = Configuration.OUTPUT_DIRECTORY.resolve("tsv");
292-
293-
switch (type) {
294-
case PROFILE:
295-
path = path.resolve("profiles");
296-
break;
297-
case INSTANCE:
298-
path = path.resolve("instances");
299-
break;
300-
case COLLECTION:
301-
path = path.resolve("collections");
302-
break;
303-
default:
304-
break;
305-
}
306-
307-
Files.createDirectories(path);
308-
String filename = FileNameEncoder.encode(report.getName()) + ".tsv";
309-
path = path.resolve(filename);
310-
311-
TransformerFactory factory = TransformerFactory.newInstance();
312-
Source xslt = new StreamSource(Main.class.getResourceAsStream("/xslt/" + report.getClass().getSimpleName() + "2TSV.xsl"));
313-
314-
Transformer transformer = factory.newTransformer(xslt);
315-
transformer.transform(new JAXBSource(JAXBContext.newInstance(report.getClass()), report), new StreamResult(path.toFile()));
316-
}
290+
/*
291+
* private static void dumpAsTSV(Report<?> report, CurationEntityType type)
292+
* throws TransformerException, JAXBException, IOException { Path path =
293+
* Configuration.OUTPUT_DIRECTORY.resolve("tsv");
294+
*
295+
* switch (type) { case PROFILE: path = path.resolve("profiles"); break; case
296+
* INSTANCE: path = path.resolve("instances"); break; case COLLECTION: path =
297+
* path.resolve("collections"); break; default: break; }
298+
*
299+
* Files.createDirectories(path); String filename =
300+
* FileNameEncoder.encode(report.getName()) + ".tsv"; path =
301+
* path.resolve(filename);
302+
*
303+
* TransformerFactory factory = TransformerFactory.newInstance(); Source xslt =
304+
* new StreamSource(Main.class.getResourceAsStream("/xslt/" +
305+
* report.getClass().getSimpleName() + "2TSV.xsl"));
306+
*
307+
* Transformer transformer = factory.newTransformer(xslt);
308+
* transformer.transform(new
309+
* JAXBSource(JAXBContext.newInstance(report.getClass()), report), new
310+
* StreamResult(path.toFile())); }
311+
*/
317312

318313
private static Options createHelpOption() {
319314
Option help = new Option("help", "print this message");

curation-module-core/src/main/java/eu/clarin/cmdi/curation/report/CMDInstanceReport.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ public class CMDInstanceReport implements Report<CollectionReport> {
4242
@XmlAttribute(name = "score-percentage")
4343
public double scorePercentage;
4444

45-
@XmlAttribute
46-
public String timeStamp = TimeUtils.humanizeToDate(System.currentTimeMillis());
45+
@XmlAttribute(name = "creation-time")
46+
public String creationTime = TimeUtils.humanizeToDate(System.currentTimeMillis());
4747

4848
// sub reports **************************************
4949

curation-module-core/src/main/java/eu/clarin/cmdi/curation/report/CollectionReport.java

Lines changed: 13 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import eu.clarin.cmdi.curation.utils.CategoryColor;
55
import eu.clarin.cmdi.curation.utils.TimeUtils;
66
import eu.clarin.cmdi.curation.xml.XMLMarshaller;
7+
import eu.clarin.cmdi.rasa.DAO.CheckedLink;
78
import eu.clarin.cmdi.rasa.DAO.Statistics.CategoryStatistics;
89
import eu.clarin.cmdi.rasa.filters.CheckedLinkFilter;
910
import org.slf4j.Logger;
@@ -130,71 +131,6 @@ public String getParentName() {
130131
@Override
131132
public void mergeWithParent(CollectionReport parentReport) {
132133
LOG.error("this should never happen??? a collection report cant have a parent to get merged into");
133-
// parentReport.score += score;
134-
// if (insMinScore < parentReport.insMinScore)
135-
// parentReport.insMinScore = insMinScore;
136-
//
137-
// if (insMaxScore > parentReport.insMaxScore)
138-
// parentReport.insMaxScore = insMaxScore;
139-
//
140-
// // ResProxies
141-
//
142-
// parentReport.resProxyReport.totNumOfResProxies += resProxyReport.totNumOfResProxies;
143-
// parentReport.resProxyReport.totNumOfResourcesWithMime += resProxyReport.totNumOfResourcesWithMime;
144-
// parentReport.resProxyReport.totNumOfResProxiesWithReferences += resProxyReport.totNumOfResProxiesWithReferences;
145-
//
146-
// // XMLValidator
147-
// parentReport.xmlValidationReport.totNumOfRecords += xmlValidationReport.totNumOfRecords;
148-
// parentReport.xmlValidationReport.totNumOfValidRecords += xmlValidationReport.totNumOfValidRecords;
149-
// parentReport.xmlValidationReport.record.addAll(this.xmlValidationReport.record);
150-
//
151-
// // XMLPopulatedValidator
152-
// parentReport.xmlPopulatedReport.totNumOfXMLElements += xmlPopulatedReport.totNumOfXMLElements;
153-
// parentReport.xmlPopulatedReport.totNumOfXMLSimpleElements += xmlPopulatedReport.totNumOfXMLSimpleElements;
154-
// parentReport.xmlPopulatedReport.totNumOfXMLEmptyElement += xmlPopulatedReport.totNumOfXMLEmptyElement;
155-
//
156-
// // URL
157-
// parentReport.urlReport.totNumOfLinks += urlReport.totNumOfLinks;
158-
//// parentReport.urlReport.totNumOfUniqueLinks += urlReport.totNumOfUniqueLinks;
159-
// parentReport.urlReport.totNumOfCheckedLinks += urlReport.totNumOfCheckedLinks;
160-
//// parentReport.urlReport.totNumOfResProxiesLinks += urlReport.totNumOfResProxiesLinks;
161-
// parentReport.urlReport.totNumOfBrokenLinks += urlReport.totNumOfBrokenLinks;
162-
//
163-
// // Facet
164-
// facetReport.facet.forEach(facet -> {
165-
// FacetCollectionStruct parFacet = parentReport.facetReport.facet.stream().filter(f -> f.name.equals(facet.name)).findFirst().orElse(null);
166-
// parFacet.cnt += facet.cnt;
167-
// });
168-
//
169-
// // Profiles
170-
// for (Profile p : headerReport.profiles.profiles)
171-
// parentReport.handleProfile(p);
172-
//
173-
// // MDSelfLinks
174-
// if (headerReport.duplicatedMDSelfLink != null && !headerReport.duplicatedMDSelfLink.isEmpty()) {
175-
//
176-
// if (parentReport.headerReport.duplicatedMDSelfLink == null) {
177-
// parentReport.headerReport.duplicatedMDSelfLink = new ArrayList<>();
178-
// }
179-
//
180-
// for (String mdSelfLink : headerReport.duplicatedMDSelfLink)
181-
// if (!parentReport.headerReport.duplicatedMDSelfLink.contains(mdSelfLink))
182-
// parentReport.headerReport.duplicatedMDSelfLink.add(mdSelfLink);
183-
// }
184-
//
185-
// // invalid files
186-
// if (this.file != null) {
187-
// if (parentReport.file == null)
188-
// parentReport.file = new ArrayList<>();
189-
// parentReport.file.addAll(this.file);
190-
// }
191-
192-
// // urls
193-
// if (this.url != null) {
194-
// if (parentReport.url == null)
195-
// parentReport.url = new ArrayList<>();
196-
// parentReport.url.addAll(this.url);
197-
// }
198134

199135
}
200136

@@ -288,10 +224,21 @@ public void calculateAverageValues() {
288224
urlReport.maxRespTime = statistics.getMaxRespTime();
289225
}
290226

291-
} catch (SQLException e) {
227+
}
228+
catch (SQLException e) {
292229
LOG.error("There was a problem calculating average values: " + e.getMessage(), e);
293230
}
231+
232+
// creating zip file for download
233+
filter = Configuration.checkedLinkResource.getCheckedLinkFilter().setProviderGroupIs(getName()).setIsActive(true);
294234

235+
try (Stream<CheckedLink> stream = Configuration.checkedLinkResource.get(filter)){
236+
237+
238+
}
239+
catch(Exception ex) {
240+
LOG.error("couldn't zip file for provider group '{}' from database", getName());
241+
}
295242

296243
int totCheckedUndeterminedAndRestrictedAndBlockedRemoved = urlReport.totNumOfCheckedLinks - (urlReport.totNumOfUndeterminedLinks + urlReport.totNumOfRestrictedAccessLinks + urlReport.totNumOfBlockedByRobotsTxtLinks);
297244

0 commit comments

Comments
 (0)