From 8341243ef5619084ab0b56cf629b3158ba3a7aa6 Mon Sep 17 00:00:00 2001 From: Robert Randiak Date: Sat, 5 Aug 2023 20:24:17 +0200 Subject: [PATCH 01/10] optimize RebuildProcessing process memory usage --- .../resourceindex/ProcessingIndexRebuild.java | 75 ++++++++++++++++--- 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java index e6143df7d..75d95f0fd 100644 --- a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java +++ b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java @@ -20,6 +20,7 @@ import cz.incad.kramerius.utils.conf.KConfiguration; import org.apache.commons.io.IOUtils; import org.apache.solr.client.solrj.SolrServerException; +import org.checkerframework.checker.units.qual.C; import org.xml.sax.SAXException; import javax.xml.bind.JAXBContext; @@ -28,11 +29,11 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import java.nio.file.FileVisitOption; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; +import java.nio.file.*; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.Collections; import java.util.List; +import java.util.concurrent.ForkJoinPool; import java.util.logging.Level; import java.util.logging.Logger; @@ -75,16 +76,66 @@ public static void main(String[] args) throws IOException, SolrServerException, } else { objectStoreRoot = Paths.get(KConfiguration.getInstance().getProperty("objectStore.path")); } - Files.walk(objectStoreRoot, FileVisitOption.FOLLOW_LINKS).parallel().filter(Files::isRegularFile).forEach(path -> { - String filename = path.toString(); - try { - FileInputStream inputStream = new FileInputStream(path.toFile()); - DigitalObject digitalObject = createDigitalObject(inputStream); - rebuildProcessingIndex(feeder, digitalObject); - } catch (Exception ex) { - LOGGER.log(Level.SEVERE, "Error processing file: " + filename, ex); + + // ForkJoinPool is used to preserve parallelization. + // The default constructor of ForkJoinPool creates a pool with parallelism \ + // equal to Runtime.availableProcessors(), same as parallel streams. + ForkJoinPool forkJoinPool = new ForkJoinPool(); + + // Files.walkFileTree() is used because it does not store any Paths in memory, + // which makes it a more efficient solution to the problem compared to Files.walk(). + // ForkJoinPool.submit() does not allow running more threads than there are available processors. + Files.walkFileTree(objectStoreRoot, + Collections.singleton(FileVisitOption.FOLLOW_LINKS), + Integer.MAX_VALUE, + new FileVisitor() { + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + if (!Files.isRegularFile(file)) { + return FileVisitResult.CONTINUE; + } + + forkJoinPool.submit(() -> { + String filename = file.toString(); + try { + FileInputStream inputStream = new FileInputStream(file.toFile()); + DigitalObject digitalObject = createDigitalObject(inputStream); + rebuildProcessingIndex(feeder, digitalObject); + } catch (Exception ex) { + LOGGER.log(Level.SEVERE, "Error processing file: " + filename, ex); + } + }); + + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + return FileVisitResult.TERMINATE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + return FileVisitResult.TERMINATE; } }); + +// Files.walk(objectStoreRoot, FileVisitOption.FOLLOW_LINKS).parallel().filter(Files::isRegularFile).forEach(path -> { +// String filename = path.toString(); +// try { +// FileInputStream inputStream = new FileInputStream(path.toFile()); +// DigitalObject digitalObject = createDigitalObject(inputStream); +// rebuildProcessingIndex(feeder, digitalObject); +// } catch (Exception ex) { +// LOGGER.log(Level.SEVERE, "Error processing file: " + filename, ex); +// } +// }); + LOGGER.info("Finished tree walk in " + (System.currentTimeMillis() - start) + " ms"); fa.shutdown(); From 2ea588c8c58118c704c1198249aa076c0ffac7ff Mon Sep 17 00:00:00 2001 From: Robert Randiak Date: Sat, 5 Aug 2023 20:30:51 +0200 Subject: [PATCH 02/10] RebuildProcessing process - handle permission errors --- .../resourceindex/ProcessingIndexRebuild.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java index 75d95f0fd..846687efc 100644 --- a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java +++ b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java @@ -116,12 +116,22 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO @Override public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { - return FileVisitResult.TERMINATE; + LOGGER.log(Level.SEVERE, "Error processing file: " + file.toString(), exc); + + // This will allow the execution to continue uninterrupted, + // even in the event of encountering permission errors. + return FileVisitResult.CONTINUE; } @Override public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { - return FileVisitResult.TERMINATE; + if (exc != null) { + LOGGER.log(Level.SEVERE, "Error searching directory : " + dir.toString(), exc); + } + + // This will allow the execution to continue uninterrupted, + // even in the event of encountering permission errors. + return FileVisitResult.CONTINUE; } }); From d132482f1b7814ab40f2484f4316e767c28f1870 Mon Sep 17 00:00:00 2001 From: Robert Randiak Date: Tue, 8 Aug 2023 06:42:56 +0200 Subject: [PATCH 03/10] RebuildProcessing process - fix closing FileInputStream --- .../resourceindex/ProcessingIndexRebuild.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java index 846687efc..cf55cbe93 100644 --- a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java +++ b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java @@ -84,7 +84,7 @@ public static void main(String[] args) throws IOException, SolrServerException, // Files.walkFileTree() is used because it does not store any Paths in memory, // which makes it a more efficient solution to the problem compared to Files.walk(). - // ForkJoinPool.submit() does not allow running more threads than there are available processors. + // ForkJoinPool.execute() does not allow running more threads than there are available processors. Files.walkFileTree(objectStoreRoot, Collections.singleton(FileVisitOption.FOLLOW_LINKS), Integer.MAX_VALUE, @@ -100,8 +100,14 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO return FileVisitResult.CONTINUE; } - forkJoinPool.submit(() -> { + forkJoinPool.execute(() -> { String filename = file.toString(); + try (FileInputStream inputStream = new FileInputStream(file.toFile())) { + DigitalObject digitalObject = createDigitalObject(inputStream); + rebuildProcessingIndex(feeder, digitalObject); + } catch (Exception ex) { + LOGGER.log(Level.SEVERE, "Error processing file: " + filename, ex); + } try { FileInputStream inputStream = new FileInputStream(file.toFile()); DigitalObject digitalObject = createDigitalObject(inputStream); From 5d7561af1cb9a1515d2e2f4b7210acf753a3d272 Mon Sep 17 00:00:00 2001 From: Robert Randiak Date: Mon, 14 Aug 2023 13:09:27 +0200 Subject: [PATCH 04/10] fix RebuildProcessing's ForkJoinPool queue growing indefinitely --- .../resourceindex/ProcessingIndexRebuild.java | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java index cf55cbe93..8c86a7e21 100644 --- a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java +++ b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java @@ -41,6 +41,11 @@ * Deklarace procesu je v shared/common/src/main/java/cz/incad/kramerius/processes/res/lp.st (processing_rebuild) */ public class ProcessingIndexRebuild { + // Could be any number between 100 and 500,000. Lower the number, lower memory usage. + // If it was too low, parallelization would be less effective. + // If it was too large, memory usage would slower overall execution, because of memory management. + private static final int MAX_QUEUED_SUBMITTED_TASKS = 10000; + public static final Logger LOGGER = Logger.getLogger(ProcessingIndexCheck.class.getName()); private static Unmarshaller unmarshaller = null; @@ -84,7 +89,6 @@ public static void main(String[] args) throws IOException, SolrServerException, // Files.walkFileTree() is used because it does not store any Paths in memory, // which makes it a more efficient solution to the problem compared to Files.walk(). - // ForkJoinPool.execute() does not allow running more threads than there are available processors. Files.walkFileTree(objectStoreRoot, Collections.singleton(FileVisitOption.FOLLOW_LINKS), Integer.MAX_VALUE, @@ -100,7 +104,17 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO return FileVisitResult.CONTINUE; } - forkJoinPool.execute(() -> { + if (forkJoinPool.getQueuedSubmissionCount() < MAX_QUEUED_SUBMITTED_TASKS) { + forkJoinPool.execute(() -> { + String filename = file.toString(); + try (FileInputStream inputStream = new FileInputStream(file.toFile())) { + DigitalObject digitalObject = createDigitalObject(inputStream); + rebuildProcessingIndex(feeder, digitalObject); + } catch (Exception ex) { + LOGGER.log(Level.SEVERE, "Error processing file: " + filename, ex); + } + }); + } else { String filename = file.toString(); try (FileInputStream inputStream = new FileInputStream(file.toFile())) { DigitalObject digitalObject = createDigitalObject(inputStream); @@ -108,14 +122,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO } catch (Exception ex) { LOGGER.log(Level.SEVERE, "Error processing file: " + filename, ex); } - try { - FileInputStream inputStream = new FileInputStream(file.toFile()); - DigitalObject digitalObject = createDigitalObject(inputStream); - rebuildProcessingIndex(feeder, digitalObject); - } catch (Exception ex) { - LOGGER.log(Level.SEVERE, "Error processing file: " + filename, ex); - } - }); + } return FileVisitResult.CONTINUE; } From bca38ddfd2e6bbdf4cf85d52b302ada3bae469d4 Mon Sep 17 00:00:00 2001 From: Robert Randiak Date: Mon, 14 Aug 2023 13:10:25 +0200 Subject: [PATCH 05/10] make RebuildProcessing's unmarshaller final --- .../resourceindex/ProcessingIndexRebuild.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java index 8c86a7e21..f7c006075 100644 --- a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java +++ b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java @@ -43,22 +43,12 @@ public class ProcessingIndexRebuild { // Could be any number between 100 and 500,000. Lower the number, lower memory usage. // If it was too low, parallelization would be less effective. - // If it was too large, memory usage would slower overall execution, because of memory management. + // If it was too large, memory usage would slower overall execution, due to memory management. private static final int MAX_QUEUED_SUBMITTED_TASKS = 10000; public static final Logger LOGGER = Logger.getLogger(ProcessingIndexCheck.class.getName()); - private static Unmarshaller unmarshaller = null; - - static { - try { - JAXBContext jaxbContext = JAXBContext.newInstance(DigitalObject.class); - unmarshaller = jaxbContext.createUnmarshaller(); - } catch (Exception e) { - LOGGER.log(Level.SEVERE, "Cannot init JAXB", e); - throw new RuntimeException(e); - } - } + private static final Unmarshaller unmarshaller = initUnmarshaller(); private volatile static long counter = 0; @@ -221,4 +211,14 @@ private static void rebuildProcessingIndexImpl(AkubraObject akubraObject, InputS throw new RepositoryException(e); } } + + private static Unmarshaller initUnmarshaller() { + try { + JAXBContext jaxbContext = JAXBContext.newInstance(DigitalObject.class); + return jaxbContext.createUnmarshaller(); + } catch (Exception e) { + LOGGER.log(Level.SEVERE, "Cannot init JAXB", e); + throw new RuntimeException(e); + } + } } From fd2bea0086ef2bbfac75f493f8fb8af962ef0426 Mon Sep 17 00:00:00 2001 From: Robert Randiak Date: Mon, 14 Aug 2023 13:16:30 +0200 Subject: [PATCH 06/10] RebuildProcessing - enhance imports and comments --- .../resourceindex/ProcessingIndexRebuild.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java index f7c006075..8bccd2dfc 100644 --- a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java +++ b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java @@ -20,7 +20,6 @@ import cz.incad.kramerius.utils.conf.KConfiguration; import org.apache.commons.io.IOUtils; import org.apache.solr.client.solrj.SolrServerException; -import org.checkerframework.checker.units.qual.C; import org.xml.sax.SAXException; import javax.xml.bind.JAXBContext; @@ -29,10 +28,14 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import java.nio.file.*; +import java.nio.file.FileVisitOption; +import java.nio.file.FileVisitResult; +import java.nio.file.FileVisitor; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.nio.file.attribute.BasicFileAttributes; import java.util.Collections; -import java.util.List; import java.util.concurrent.ForkJoinPool; import java.util.logging.Level; import java.util.logging.Logger; @@ -73,7 +76,7 @@ public static void main(String[] args) throws IOException, SolrServerException, } // ForkJoinPool is used to preserve parallelization. - // The default constructor of ForkJoinPool creates a pool with parallelism \ + // The default constructor of ForkJoinPool creates a pool with parallelism // equal to Runtime.availableProcessors(), same as parallel streams. ForkJoinPool forkJoinPool = new ForkJoinPool(); From 9b92f8d11173a564420df0c65abe0ba33c85cd22 Mon Sep 17 00:00:00 2001 From: Robert Randiak Date: Mon, 14 Aug 2023 16:32:42 +0200 Subject: [PATCH 07/10] RebuildProcessing process - fix List import --- .../cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java | 1 + 1 file changed, 1 insertion(+) diff --git a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java index 8bccd2dfc..edf36008c 100644 --- a/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java +++ b/shared/common/src/main/java/cz/incad/kramerius/resourceindex/ProcessingIndexRebuild.java @@ -36,6 +36,7 @@ import java.nio.file.Paths; import java.nio.file.attribute.BasicFileAttributes; import java.util.Collections; +import java.util.List; import java.util.concurrent.ForkJoinPool; import java.util.logging.Level; import java.util.logging.Logger; From 8172394bc991213dc95c66675f6096b308768235 Mon Sep 17 00:00:00 2001 From: vlahoda Date: Thu, 11 Jan 2024 12:34:23 +0100 Subject: [PATCH 08/10] nepovinne heslo v maileru --- .../main/java/cz/incad/kramerius/service/impl/MailerImpl.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/shared/common/src/main/java/cz/incad/kramerius/service/impl/MailerImpl.java b/shared/common/src/main/java/cz/incad/kramerius/service/impl/MailerImpl.java index a65525be5..1589410a3 100644 --- a/shared/common/src/main/java/cz/incad/kramerius/service/impl/MailerImpl.java +++ b/shared/common/src/main/java/cz/incad/kramerius/service/impl/MailerImpl.java @@ -45,7 +45,9 @@ public Session getSession(String name, String pass) { } if (pass == null) { pass = properties.getProperty("mail.smtp.pass"); - properties.put("mail.smtp.password", pass); + if (pass != null) { + properties.put("mail.smtp.password", pass); + } } Authenticator auth = new SMTPAuthenticator(name, pass); Session session = Session.getInstance(properties, auth); From 48b21b3bc5d514447f29467910236c62984cd75d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C5=98eh=C3=A1nek?= Date: Fri, 12 Jan 2024 15:08:16 +0100 Subject: [PATCH 09/10] kramerius access logs: store data from selected http request headers into new fields hrh_referer and hrh_kramerius_client --- .../solr-7.x_8x/logs/conf/managed-schema | 6 +- .../solr-9.x.cloud/logs/managed-schema | 6 +- .../solr-9.x/logs/conf/managed-schema | 6 +- .../statistics/accesslogs/LogRecord.java | 171 +++++++++--------- .../solr/SolrStatisticsAccessLogImpl.java | 26 +-- 5 files changed, 118 insertions(+), 97 deletions(-) diff --git a/installation/solr-7.x_8x/logs/conf/managed-schema b/installation/solr-7.x_8x/logs/conf/managed-schema index d7cde2c50..ed36cb21f 100644 --- a/installation/solr-7.x_8x/logs/conf/managed-schema +++ b/installation/solr-7.x_8x/logs/conf/managed-schema @@ -165,5 +165,9 @@ - + + + + + diff --git a/installation/solr-9.x.cloud/logs/managed-schema b/installation/solr-9.x.cloud/logs/managed-schema index d7cde2c50..ed36cb21f 100644 --- a/installation/solr-9.x.cloud/logs/managed-schema +++ b/installation/solr-9.x.cloud/logs/managed-schema @@ -165,5 +165,9 @@ - + + + + + diff --git a/installation/solr-9.x/logs/conf/managed-schema b/installation/solr-9.x/logs/conf/managed-schema index 231af4e94..f100d9a0c 100644 --- a/installation/solr-9.x/logs/conf/managed-schema +++ b/installation/solr-9.x/logs/conf/managed-schema @@ -178,5 +178,9 @@ - + + + + + diff --git a/shared/common/src/main/java/cz/incad/kramerius/statistics/accesslogs/LogRecord.java b/shared/common/src/main/java/cz/incad/kramerius/statistics/accesslogs/LogRecord.java index 7d6dd11fb..637fe0d69 100644 --- a/shared/common/src/main/java/cz/incad/kramerius/statistics/accesslogs/LogRecord.java +++ b/shared/common/src/main/java/cz/incad/kramerius/statistics/accesslogs/LogRecord.java @@ -1,18 +1,12 @@ package cz.incad.kramerius.statistics.accesslogs; -import java.util.ArrayList; -import java.util.Date; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Set; -import java.util.UUID; +import org.w3c.dom.Document; +import org.w3c.dom.Element; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; - -import org.w3c.dom.Document; -import org.w3c.dom.Element; +import java.util.*; public class LogRecord { @@ -20,7 +14,7 @@ public class LogRecord { private String id; private String pid; private Date date; - + private String ipAddress; private String user; private String requestedUrl; @@ -28,16 +22,16 @@ public class LogRecord { private String rootTitle; private String rootModel; private String rootPid; - + private String reportedAction; - + private String dbVersion; private String kramVersion; private String providedByLicense; private String evaluatedMap; private String userSessionAttributes; - + private String ownPidpath; private String ownModelPath; @@ -46,23 +40,25 @@ public class LogRecord { private Set langs = new LinkedHashSet<>(); private Set titles = new LinkedHashSet<>(); private Set solrDates = new LinkedHashSet<>(); - + private Set publishers = new LinkedHashSet<>(); private Set authors = new LinkedHashSet<>(); private Set pidsPaths = new LinkedHashSet<>(); private Set modelsPaths = new LinkedHashSet<>(); - + private Set isbns = new LinkedHashSet<>(); private Set issns = new LinkedHashSet<>(); private Set ccnbs = new LinkedHashSet<>(); - + + private Map fieldsFromHttpRequestHeaders = new LinkedHashMap<>(); + private Set details = new LinkedHashSet<>(); - + public String getId() { return id; } - + public String getPid() { return pid; } @@ -126,19 +122,19 @@ public String getKramVersion() { public void setKramVersion(String kramVersion) { this.kramVersion = kramVersion; } - + public void addLicense(String license) { this.licenses.add(license); } - + public Set getLicenses() { return licenses; } - + public void removeLicense(String license) { this.licenses.remove(license); } - + public void setLicenses(Set licenses) { this.licenses = licenses; } @@ -162,19 +158,19 @@ public void setIssueDates(Set issueDates) { public void addIssueDate(String iDate) { this.issueDates.add(iDate); } - + public void removeIssueDate(String iDate) { this.issueDates.remove(iDate); } - + public void addLang(String lang) { this.langs.add(lang); } - + public void removeLang(String lang) { this.langs.remove(lang); } - + public Set getLangs() { return langs; } @@ -194,11 +190,11 @@ public void setTitles(Set titles) { public void addTitle(String title) { this.titles.add(title); } - + public void removeTitle(String title) { this.titles.remove(title); } - + public Set getSolrDates() { return solrDates; } @@ -206,31 +202,31 @@ public Set getSolrDates() { public void setSolrDates(Set solrDates) { this.solrDates = solrDates; } - + public void addSolrDate(String sDate) { this.solrDates.add(sDate); } - + public void removeSolrDate(String sDate) { this.solrDates.remove(sDate); } - + public Set getAuthors() { return authors; } - + public void addAuthor(String author) { this.authors.add(author); } - + public void removeAuthor(String author) { this.authors.remove(author); } - + public void setAuthors(Set authors) { this.authors = authors; } - + public Set getPublishers() { return publishers; } @@ -238,15 +234,15 @@ public Set getPublishers() { public void setPublishers(Set publishers) { this.publishers = publishers; } - + public void addPublisher(String publisher) { this.publishers.add(publisher); } - + public void removePublisher(String publisher) { this.publishers.remove(publisher); } - + public String getEvaluatedMap() { return evaluatedMap; } @@ -262,7 +258,7 @@ public String getUserSessionAttributes() { public void setUserSessionAttributes(String userSessionAttributes) { this.userSessionAttributes = userSessionAttributes; } - + public String getOwnPidpath() { return ownPidpath; } @@ -286,11 +282,11 @@ public Set getPidsPaths() { public void setPidsPaths(Set pidsPaths) { this.pidsPaths = pidsPaths; } - + public Set getModelsPaths() { return modelsPaths; } - + public void setModelsPaths(Set modelsPaths) { this.modelsPaths = modelsPaths; } @@ -302,7 +298,7 @@ public Set getISBNs() { public void setISBNs(Set isbns) { this.isbns = isbns; } - + public void addISBN(String isbn) { this.isbns.add(isbn); } @@ -318,11 +314,11 @@ public void setISSNs(Set issns) { public void addISSN(String issn) { this.issns.add(issn); } - + public void removeISSN(String issn) { this.issns.add(issn); } - + public Set getCCNBs() { return ccnbs; } @@ -330,11 +326,11 @@ public Set getCCNBs() { public void setCCNBs(Set ccnbs) { this.ccnbs = ccnbs; } - + public void addCCNB(String ccnb) { this.ccnbs.add(ccnb); } - + public void remvoeCCNB(String ccnb) { this.ccnbs.remove(ccnb); } @@ -350,35 +346,39 @@ public void setReportedAction(String reportedAction) { public void addDetail(LogRecordDetail detail) { this.details.add(detail); } - + public void removeDetail(LogRecordDetail detail) { this.details.remove(detail); } - + public String getRootTitle() { return rootTitle; } - + public void setRootTitle(String rootTitle) { this.rootTitle = rootTitle; } - + public String getRootModel() { return rootModel; } - + public void setRootModel(String rootModel) { this.rootModel = rootModel; } - + public String getRootPid() { return rootPid; } - + public void setRootPid(String rootPid) { this.rootPid = rootPid; } - + + public void setFieldsFromHttpRequestHeaders(Map fieldsFromHttpRequestHeaders) { + this.fieldsFromHttpRequestHeaders = fieldsFromHttpRequestHeaders; + } + private LogRecord(String id, String pid, Date date) { super(); this.id = id; @@ -390,8 +390,6 @@ public static LogRecord buildRecord(String pid) { return new LogRecord(UUID.randomUUID().toString(), pid, new Date()); } - - public Document toSolrBatch(DocumentBuilderFactory documentFactory) throws ParserConfigurationException { DocumentBuilder document = documentFactory.newDocumentBuilder(); Document doc = document.newDocument(); @@ -399,18 +397,17 @@ public Document toSolrBatch(DocumentBuilderFactory documentFactory) throws Parse Element docElm = doc.createElement("doc"); doc.appendChild(add); add.appendChild(docElm); - + Element idField = doc.createElement("field"); idField.setAttribute("name", "id"); idField.setTextContent(this.id); docElm.appendChild(idField); - - + Element pidField = doc.createElement("field"); pidField.setAttribute("name", "pid"); pidField.setTextContent(this.pid); docElm.appendChild(pidField); - + Element ipddress = doc.createElement("field"); ipddress.setAttribute("name", "ip_address"); ipddress.setTextContent(this.ipAddress); @@ -420,14 +417,14 @@ public Document toSolrBatch(DocumentBuilderFactory documentFactory) throws Parse requestedUrl.setAttribute("name", "requested_url"); requestedUrl.setTextContent(this.requestedUrl); docElm.appendChild(requestedUrl); - + if (this.rootTitle != null) { Element rootTitleElm = doc.createElement("field"); rootTitleElm.setAttribute("name", "root_title"); rootTitleElm.setTextContent(this.rootTitle); docElm.appendChild(rootTitleElm); } - + if (this.user != null) { Element userField = doc.createElement("field"); userField.setAttribute("name", "user"); @@ -441,9 +438,11 @@ public Document toSolrBatch(DocumentBuilderFactory documentFactory) throws Parse pidPathElm.setAttribute("name", "pid_paths"); pidPathElm.setTextContent(pidPath); docElm.appendChild(pidPathElm); - + String[] split = pidPath.split("/"); - for (String pid : split) {allPids.add(pid);} + for (String pid : split) { + allPids.add(pid); + } for (String p : allPids) { Element pElm = doc.createElement("field"); pElm.setAttribute("name", "all_pids"); @@ -451,7 +450,7 @@ public Document toSolrBatch(DocumentBuilderFactory documentFactory) throws Parse docElm.appendChild(pElm); } } - + if (this.ownModelPath != null) { Set allModels = new LinkedHashSet<>(); Element oModelElm = doc.createElement("field"); @@ -460,7 +459,9 @@ public Document toSolrBatch(DocumentBuilderFactory documentFactory) throws Parse docElm.appendChild(oModelElm); String[] split = this.ownModelPath.split("/"); - for (String pid : split) {allModels.add(pid);} + for (String pid : split) { + allModels.add(pid); + } for (String m : allModels) { Element pElm = doc.createElement("field"); pElm.setAttribute("name", "all_models"); @@ -468,31 +469,29 @@ public Document toSolrBatch(DocumentBuilderFactory documentFactory) throws Parse docElm.appendChild(pElm); } } - + if (this.ownPidpath != null) { Element oModelElm = doc.createElement("field"); oModelElm.setAttribute("name", "own_pid_path"); oModelElm.setTextContent(this.ownPidpath); docElm.appendChild(oModelElm); } - - for (LogRecordDetail detail : this.details) { + for (LogRecordDetail detail : this.details) { Element dElm = doc.createElement("field"); - dElm.setAttribute("name", "pids_"+detail.getModel()); + dElm.setAttribute("name", "pids_" + detail.getModel()); dElm.setTextContent(detail.getPid()); docElm.appendChild(dElm); - + } - if (this.sessionToken != null) { Element sessionToken = doc.createElement("field"); sessionToken.setAttribute("name", "session_token"); sessionToken.setTextContent(this.sessionToken); docElm.appendChild(sessionToken); } - + if (this.evaluatedMap != null) { Element evaluatedMap = doc.createElement("field"); evaluatedMap.setAttribute("name", "evaluated_map"); @@ -506,7 +505,7 @@ public Document toSolrBatch(DocumentBuilderFactory documentFactory) throws Parse userSessionAttributes.setTextContent(this.userSessionAttributes); docElm.appendChild(userSessionAttributes); } - + Element dbVersion = doc.createElement("field"); dbVersion.setAttribute("name", "db_version"); dbVersion.setTextContent(this.dbVersion); @@ -518,21 +517,21 @@ public Document toSolrBatch(DocumentBuilderFactory documentFactory) throws Parse licenses.setTextContent(license); docElm.appendChild(licenses); } - + if (this.providedByLicense != null) { Element providedByLicense = doc.createElement("field"); providedByLicense.setAttribute("name", "provided_by_license"); providedByLicense.setTextContent(this.providedByLicense); docElm.appendChild(providedByLicense); } - + if (this.reportedAction != null) { Element reportedAction = doc.createElement("field"); reportedAction.setAttribute("name", "reported_action"); reportedAction.setTextContent(this.reportedAction); docElm.appendChild(reportedAction); } - + for (String issueDate : this.issueDates) { Element issueDateElm = doc.createElement("field"); issueDateElm.setAttribute("name", "issue_dates"); @@ -546,21 +545,20 @@ public Document toSolrBatch(DocumentBuilderFactory documentFactory) throws Parse langElm.setTextContent(lang); docElm.appendChild(langElm); } - + for (String title : this.titles) { Element titleElm = doc.createElement("field"); titleElm.setAttribute("name", "titles"); titleElm.setTextContent(title); docElm.appendChild(titleElm); } - + for (String sDate : this.solrDates) { Element sDateElm = doc.createElement("field"); sDateElm.setAttribute("name", "titles"); sDateElm.setTextContent(sDate); docElm.appendChild(sDateElm); } - for (String auth : this.authors) { Element sAuthElm = doc.createElement("field"); @@ -568,14 +566,14 @@ public Document toSolrBatch(DocumentBuilderFactory documentFactory) throws Parse sAuthElm.setTextContent(auth); docElm.appendChild(sAuthElm); } - + for (String publisher : this.publishers) { Element pElm = doc.createElement("field"); pElm.setAttribute("name", "publishers"); pElm.setTextContent(publisher); docElm.appendChild(pElm); } - + for (String isbn : this.isbns) { Element pElm = doc.createElement("field"); pElm.setAttribute("name", "id_isbn"); @@ -595,7 +593,14 @@ public Document toSolrBatch(DocumentBuilderFactory documentFactory) throws Parse pElm.setTextContent(issn); docElm.appendChild(pElm); } - + + for (String headerName : fieldsFromHttpRequestHeaders.keySet()) { + Element pElm = doc.createElement("field"); + pElm.setAttribute("name", "hrh_" + headerName.toLowerCase().replaceAll("-", "_")); + pElm.setTextContent(fieldsFromHttpRequestHeaders.get(headerName)); + docElm.appendChild(pElm); + } + return doc; } } diff --git a/shared/common/src/main/java/cz/incad/kramerius/statistics/accesslogs/solr/SolrStatisticsAccessLogImpl.java b/shared/common/src/main/java/cz/incad/kramerius/statistics/accesslogs/solr/SolrStatisticsAccessLogImpl.java index 87d6d420c..fd139ee95 100644 --- a/shared/common/src/main/java/cz/incad/kramerius/statistics/accesslogs/solr/SolrStatisticsAccessLogImpl.java +++ b/shared/common/src/main/java/cz/incad/kramerius/statistics/accesslogs/solr/SolrStatisticsAccessLogImpl.java @@ -4,10 +4,7 @@ import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; -import java.sql.Connection; import java.sql.SQLException; -import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.HashMap; @@ -29,7 +26,6 @@ import org.apache.http.HttpEntity; import org.apache.http.client.HttpResponseException; import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; @@ -48,7 +44,6 @@ import cz.incad.kramerius.SolrAccess; import cz.incad.kramerius.database.VersionService; import cz.incad.kramerius.pdf.utils.ModsUtils; -import cz.incad.kramerius.processes.NotReadyException; import cz.incad.kramerius.security.RightsReturnObject; import cz.incad.kramerius.security.SpecialObjects; import cz.incad.kramerius.security.User; @@ -61,15 +56,9 @@ import cz.incad.kramerius.statistics.accesslogs.LogRecord; import cz.incad.kramerius.statistics.accesslogs.LogRecordDetail; import cz.incad.kramerius.statistics.accesslogs.database.DatabaseStatisticsAccessLogImpl; -import cz.incad.kramerius.statistics.accesslogs.database.DatabaseStatisticsAccessLogImpl.InsertAuthor; -import cz.incad.kramerius.statistics.accesslogs.database.DatabaseStatisticsAccessLogImpl.InsertDetail; -import cz.incad.kramerius.statistics.accesslogs.database.DatabaseStatisticsAccessLogImpl.InsertPublisher; -import cz.incad.kramerius.statistics.accesslogs.database.DatabaseStatisticsAccessLogImpl.InsertRecord; import cz.incad.kramerius.statistics.accesslogs.utils.SElemUtils; import cz.incad.kramerius.users.LoggedUsersSingleton; import cz.incad.kramerius.utils.DCUtils; -import cz.incad.kramerius.utils.DatabaseUtils; -import cz.incad.kramerius.utils.XMLUtils; import cz.incad.kramerius.utils.conf.KConfiguration; import cz.incad.kramerius.utils.solr.SolrUpdateUtils; import cz.incad.kramerius.utils.solr.SolrUtils; @@ -200,6 +189,8 @@ public void reportAccess(final String pid, final String streamName) throws IOExc String sessionId = requestProvider.get().getSession().getId(); logRecord.setSessionToken(sessionId); } + + logRecord.setFieldsFromHttpRequestHeaders(extractFieldsFromHttpRequestHeaders()); for (int i = 0, ll = paths.length; i < ll; i++) { if (paths[i].contains(SpecialObjects.REPOSITORY.getPid())) { @@ -290,6 +281,19 @@ public void reportAccess(final String pid, final String streamName) throws IOExc } } } + + private Map extractFieldsFromHttpRequestHeaders() { + Map results = new HashMap<>(); + String referer = requestProvider.get().getHeader("Referer"); + if (referer != null) { + results.put("Referer", referer); + } + String headerKrameriusClient = requestProvider.get().getHeader("Kramerius-client"); + if (headerKrameriusClient != null) { + results.put("Kramerius-client", headerKrameriusClient); + } + return results; + } //TODO: Implement @Override From cd770c6534fe32de80e6221ded90c73e862e7ed2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C5=98eh=C3=A1nek?= Date: Fri, 12 Jan 2024 16:49:49 +0100 Subject: [PATCH 10/10] kramerius access logs: include hrh_referer and hrh_kramerius_client in exports --- .../cz/incad/kramerius/statistics/impl/NKPLogReport.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/shared/common/src/main/java/cz/incad/kramerius/statistics/impl/NKPLogReport.java b/shared/common/src/main/java/cz/incad/kramerius/statistics/impl/NKPLogReport.java index 6b602ab74..2c27d394c 100644 --- a/shared/common/src/main/java/cz/incad/kramerius/statistics/impl/NKPLogReport.java +++ b/shared/common/src/main/java/cz/incad/kramerius/statistics/impl/NKPLogReport.java @@ -327,7 +327,12 @@ private static Map toMap(JSONObject obj) { if (obj.has("user")) { map.put("username", obj.optString("user")); } - + if(obj.has("hrh_referer")){ + map.put("hrh_referer", obj.optString("hrh_referer")); + } + if(obj.has("hrh_kramerius_client")){ + map.put("hrh_kramerius_client", obj.optString("hrh_kramerius_client")); + } List licenses = listString(obj, "licenses"); map.put("dnnt", licenses.contains("dnntt") || licenses.contains("dnnto"));