Skip to content

Commit

Permalink
Merge branch 'dev' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
wowasa committed Apr 4, 2023
2 parents 6506176 + a251be8 commit c93d295
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 13 deletions.
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# version 3.0.1
- bugfix in class MetricsFetcherBolt to prevent null message
- bugfix in dependency linkchecker-persistence

# version 3.0.0
- replacement of the persistence layer: the [resource availability status API (RASA)](https://github.com/clarin-eric/resource-availability-status-api)
is replaced by [curation-persistence](https://github.com/clarin-eric/curation-persistence)
Expand Down
4 changes: 2 additions & 2 deletions crawler-test-conf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ config:
- fetch.redirectCount
- http.method.head

http.agent.name: "CLARIN Linkchecker: https://www.clarin.eu/linkchecker (!!! TEST !!!)"
http.agent.name: "CLARIN Linkchecker: https://www.clarin.eu/linkchecker"
http.agent.version: "2.4"
http.agent.description: "built with StormCrawler Archetype 2.4"
http.agent.url: "https://www.clarin.eu/linkchecker"
Expand Down Expand Up @@ -128,7 +128,7 @@ config:
spring.datasource.username: ${ENV-MYSQL_USER}
spring.datasource.password: ${ENV-MYSQL_PASSWORD}
spring.datasource.driver-class-name: org.mariadb.jdbc.Driver
spring.jpa.show-sql: true
spring.jpa.show-sql: false
spring.jpa.hibernate.ddl-auto: none
spring.database-platform: org.hibernate.dialect.MariaDBDialect

Expand Down
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
<modelVersion>4.0.0</modelVersion>
<groupId>eu.clarin.cmdi</groupId>
<artifactId>linkchecker</artifactId>
<version>3.0.0</version>
<version>3.0.1</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<storm.version>2.4.0</storm.version>
<stormcrawler.version>2.4</stormcrawler.version>
<lombok.version>1.18.24</lombok.version>
<linkchecker-persistence.version>0.0.2</linkchecker-persistence.version>
<linkchecker-persistence.version>0.0.3</linkchecker-persistence.version>
</properties>
<distributionManagement>
<snapshotRepository>
Expand Down Expand Up @@ -195,7 +195,7 @@
<dependency>
<groupId>org.mariadb.jdbc</groupId>
<artifactId>mariadb-java-client</artifactId>
<version>3.1.2</version>
<version>3.1.3</version>
</dependency>
</dependencies>
<repositories>
Expand Down
10 changes: 3 additions & 7 deletions src/main/java/eu/clarin/linkchecker/bolt/MetricsFetcherBolt.java
Original file line number Diff line number Diff line change
Expand Up @@ -659,13 +659,11 @@ else if (Configuration.restrictedAccessStatusCodes.contains(response.getStatusCo
continue;
}

String message = exece.getMessage();
if (message == null)
message = "";
String message = exece.getMessage();

// common exceptions for which we log only a short message
if (exece.getCause() instanceof java.util.concurrent.TimeoutException
|| message.contains(" timed out")) {
|| (message != null && message.contains(" timed out"))) {
log.debug("Socket timeout fetching {}", fit.url);
message = "Socket timeout fetching";
}
Expand All @@ -690,9 +688,7 @@ else if (exece.getCause() instanceof java.net.UnknownHostException

metadata.setValue("fetch.category", getCategoryFromException(exece, fit.url).name());

metadata.setValue("fetch.message", exece.getMessage());

// metadata.setValue("fetch.startTime", Long.toString(start));
metadata.setValue("fetch.message", message);

// send to status stream
collector.emit(Constants.StatusStreamName, fit.t, new Values(fit.url, metadata, Status.DISCOVERED));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public synchronized void store(String url, Status status, Metadata metadata, Opt
eu.clarin.linkchecker.persistence.model.Status statusEntity = new eu.clarin.linkchecker.persistence.model.Status(
urlEntity,
Category.valueOf(md.getFirstValue("fetch.category")),
md.getFirstValue("fetch.message"),
md.getFirstValue("fetch.message").length() < 1024?md.getFirstValue("fetch.message"): md.getFirstValue("fetch.message").subSequence(0, 1017) + "[...]",
md.getFirstValue("fetch.startTime") != null?
Instant.ofEpochMilli(Long.parseLong(md.getFirstValue("fetch.startTime"))).atZone(ZoneId.systemDefault()).toLocalDateTime()
: LocalDateTime.now()
Expand Down Expand Up @@ -131,6 +131,7 @@ public synchronized void store(String url, Status status, Metadata metadata, Opt
}
catch (Exception ex) {
log.error("can't save checked link \n{}", statusEntity);
log.error("metadata:\n" + md.toString());
_collector.fail(t);
}
}
Expand Down

0 comments on commit c93d295

Please sign in to comment.