Skip to content

Commit

Permalink
Merge pull request #46 from clarin-eric/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
Wolfgang Walter SAUER authored Jul 12, 2019
2 parents d82085f + 378a188 commit f731e0d
Show file tree
Hide file tree
Showing 32 changed files with 696 additions and 1,527 deletions.
8 changes: 4 additions & 4 deletions curation-module-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<artifactId>curation-module</artifactId>
<groupId>eu.clarin.cmdi</groupId>
<version>3.0</version>
<version>3.1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>curation-module-core</artifactId>
Expand Down Expand Up @@ -92,8 +92,8 @@
</dependency>
<dependency>
<groupId>eu.clarin.cmdi</groupId>
<artifactId>linkChecker</artifactId>
<version>3.0</version>
<artifactId>linkchecker</artifactId>
<version>3.0.1</version>
</dependency>
<!--<dependency> -->
<!--<groupId>com.github.mfornos</groupId> -->
Expand Down Expand Up @@ -143,5 +143,5 @@
</plugin>
</plugins>
</build>
<version>3.0</version>
<version>3.1</version>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import eu.clarin.cmdi.curation.cr.profile_parser.ProfileParserFactory;
import eu.clarin.cmdi.curation.main.Configuration;
import eu.clarin.cmdi.curation.xml.SchemaResourceResolver;
import eu.clarin.curation.linkchecker.httpLinkChecker.HTTPLinkChecker;
import eu.clarin.cmdi.linkchecker.httpLinkChecker.HTTPLinkChecker;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public class Configuration {
public static String DATABASE_URI;
public static String USERAGENT;
public static String BASE_URL;
public static String CMD_STORAGE_URL;

//this is a boolean that is set by core-module(false) and web-module(true)
public static boolean enableProfileLoadTimer = false;
Expand Down Expand Up @@ -123,7 +124,6 @@ private static void readProperties(Properties config) throws IOException {

USERAGENT = config.getProperty("USERAGENT");
BASE_URL = config.getProperty("BASE_URL");


CMD_STORAGE_URL = config.getProperty("CMD_STORAGE_URL");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import eu.clarin.cmdi.curation.report.CMDInstanceReport;
import eu.clarin.cmdi.curation.report.Report;
import eu.clarin.cmdi.curation.utils.FileNameEncoder;
import eu.clarin.curation.linkchecker.httpLinkChecker.HTTPLinkChecker;
import eu.clarin.cmdi.linkchecker.httpLinkChecker.HTTPLinkChecker;

import java.io.IOException;
import java.net.MalformedURLException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public static class URLElement {
@XmlAttribute(name = "redirectCount")
public int redirectCount;

public URLElement convertFromLinkCheckerURLElement(eu.clarin.curation.linkchecker.urlElements.URLElement urlElement) {
public URLElement convertFromLinkCheckerURLElement(eu.clarin.cmdi.linkchecker.urlElements.URLElement urlElement) {
url = urlElement.getUrl();
method = urlElement.getMethod();
message = urlElement.getMessage();
Expand Down Expand Up @@ -200,7 +200,7 @@ private static final synchronized void mergeWithParent(CollectionReport parentRe
parentReport.xmlValidationReport.totNumOfValidRecords += instanceReport.xmlValidityReport.valid ? 1 : 0;
if (!instanceReport.xmlValidityReport.valid) {
Record record = new Record();
record.name = instanceReport.getName();
record.name = instanceReport.fileReport.location;
record.issues = instanceReport.xmlValidityReport.issues;
parentReport.xmlValidationReport.record.add(record);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ public static class Profile {
@XmlElementWrapper(name = "facets")
@XmlElement(name = "facet")
private List<Facet> facets = new ArrayList<Facet>();
@XmlElement
private double collectionUsage;
@XmlElement
private double instanceUsage;

public Profile() {

Expand All @@ -108,11 +112,12 @@ public Profile(CMDProfileReport report) {
this.reportName = report.getName();
this.score = report.score;
this.facetCoverage = report.facet.profileCoverage;
this.percOfElementsWithConcept = report.elements.percWithConcept;

this.percOfElementsWithConcept = report.elements.percWithConcept;
this.collectionUsage = report.collectionUsage.size();

report.collectionUsage.forEach(usage -> this.instanceUsage+=usage.count);
report.facet.coverage.forEach(f -> facets.add(new Facet(f.name, f.coveredByProfile)));
}

}

@XmlRootElement
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,15 @@ public void process(CMDInstance entity, CMDInstanceReport report) throws IOExcep
if (entity.getUrl() != null) {
report.fileReport.location = FileNameEncoder.encode(entity.getUrl());
} else {
report.fileReport.location = entity.getPath().toString();
int index = -1;
String path = entity.getPath().toString();

if((index = path.indexOf("/clarin")) != -1)
report.fileReport.location = Configuration.CMD_STORAGE_URL + path.substring(index +1);
else if((index = path.indexOf("/europeana")) != -1)
report.fileReport.location = Configuration.CMD_STORAGE_URL + path.substring(index +1);
else
report.fileReport.location = path;
}

if (report.fileReport.size > Configuration.MAX_FILE_SIZE) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,19 @@
import com.mongodb.client.*;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.IndexOptions;
import com.mongodb.client.model.Indexes;
import eu.clarin.cmdi.curation.entities.CMDInstance;
import eu.clarin.cmdi.curation.main.Configuration;
import eu.clarin.cmdi.curation.report.CMDInstanceReport;
import eu.clarin.cmdi.curation.report.CMDInstanceReport.URLReport;
import eu.clarin.cmdi.curation.report.Score;
import eu.clarin.cmdi.curation.report.Severity;
import eu.clarin.cmdi.curation.utils.TimeUtils;
import eu.clarin.cmdi.linkchecker.httpLinkChecker.HTTPLinkChecker;
import eu.clarin.cmdi.linkchecker.urlElements.URLElement;
import eu.clarin.cmdi.linkchecker.urlElements.URLElementToBeChecked;
import eu.clarin.cmdi.vlo.importer.CMDIData;
import eu.clarin.cmdi.vlo.importer.Resource;
import eu.clarin.cmdi.vlo.importer.processor.ValueSet;
import eu.clarin.curation.linkchecker.httpLinkChecker.HTTPLinkChecker;
import eu.clarin.curation.linkchecker.urlElements.URLElement;
import eu.clarin.curation.linkchecker.urlElements.URLElementToBeChecked;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.slf4j.Logger;
Expand All @@ -31,7 +30,7 @@
import static com.mongodb.client.model.Filters.eq;

/**
*
*/

public class URLValidator extends CMDSubprocessor {
Expand Down Expand Up @@ -118,7 +117,7 @@ public void process(CMDInstance entity, CMDInstanceReport report, String parentN

_logger.info("Checking database for url: " + url);

Bson filter = Filters.and(eq("collection", parentName), eq("record", report.getName()), eq("url", url));
Bson filter = Filters.and(eq("collection", parentName), eq("url", url));
MongoCursor<Document> cursor = linksChecked.find(filter).iterator();

//because urls are unique in the database if cursor has next, it found the only one. If not, the url wasn't found.
Expand Down Expand Up @@ -232,19 +231,21 @@ private void removeOldURLs(Collection<String> links, String recordName, String c
//some old runs may have produced links that are not in the records anymore.
//so to clean up the database, we move all of such links to history.

Bson filter = Filters.and(Filters.eq("collection", collectionName), Filters.eq("record", recordName));
Bson filter = Filters.and(Filters.eq("collection", collectionName), Filters.eq("record", recordName), Filters.not(Filters.in("url", links)));
MongoCursor<Document> cursor = linksChecked.find(filter).iterator();

while (cursor.hasNext()) {

URLElement urlElement = new URLElement(cursor.next());
String url = urlElement.getUrl();

if (!links.contains(url)) {
moveToHistory(urlElement);
}
moveToHistory(urlElement);

}

//also remove them from linkstobechecked so that they are not checked unnecessarily
linksToBeChecked.deleteMany(filter);


}

private void moveToHistory(URLElement urlElement) {
Expand Down
1 change: 1 addition & 0 deletions curation-module-core/src/main/resources/config.properties
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ SAVE_REPORT=true
OUTPUT_DIRECTORY=
CACHE_DIRECTORY=
COLLECTION_HTML_DIRECTORY=
CMD_STORAGE_URL=https://vlo.clarin.eu/data/
FACETS=languageCode, collection, resourceClass, modality, format, keywords, genre, subject, country, organisation, name, description, license, availability


Expand Down
Loading

0 comments on commit f731e0d

Please sign in to comment.