Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
pavel-stastny committed Jan 18, 2024
2 parents 57ce932 + 359347d commit 599d410
Show file tree
Hide file tree
Showing 8 changed files with 224 additions and 118 deletions.
6 changes: 5 additions & 1 deletion installation/solr-7.x_8x/logs/conf/managed-schema
Original file line number Diff line number Diff line change
Expand Up @@ -165,5 +165,9 @@
<copyField source="id_isbn" dest="all_identifiers"/>
<copyField source="id_issn" dest="all_identifiers"/>
<copyField source="id_ccnb" dest="all_identifiers"/>


<!-- http request headers -->
<field name="hrh_referer" type="string" indexed="true" stored="true" multiValued="false" docValues="true"/>
<field name="hrh_kramerius_client" type="string" indexed="true" stored="true" multiValued="false" docValues="true"/>

</schema>
6 changes: 5 additions & 1 deletion installation/solr-9.x.cloud/logs/managed-schema
Original file line number Diff line number Diff line change
Expand Up @@ -165,5 +165,9 @@
<copyField source="id_isbn" dest="all_identifiers"/>
<copyField source="id_issn" dest="all_identifiers"/>
<copyField source="id_ccnb" dest="all_identifiers"/>


<!-- http request headers -->
<field name="hrh_referer" type="string" indexed="true" stored="true" multiValued="false" docValues="true"/>
<field name="hrh_kramerius_client" type="string" indexed="true" stored="true" multiValued="false" docValues="true"/>

</schema>
6 changes: 5 additions & 1 deletion installation/solr-9.x/logs/conf/managed-schema
Original file line number Diff line number Diff line change
Expand Up @@ -178,5 +178,9 @@
<copyField source="id_isbn" dest="all_identifiers"/>
<copyField source="id_issn" dest="all_identifiers"/>
<copyField source="id_ccnb" dest="all_identifiers"/>


<!-- http request headers -->
<field name="hrh_referer" type="string" indexed="true" stored="true" multiValued="false" docValues="true"/>
<field name="hrh_kramerius_client" type="string" indexed="true" stored="true" multiValued="false" docValues="true"/>

</schema>
Original file line number Diff line number Diff line change
Expand Up @@ -29,30 +29,30 @@
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.FileVisitOption;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ForkJoinPool;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* Deklarace procesu je v shared/common/src/main/java/cz/incad/kramerius/processes/res/lp.st (processing_rebuild)
*/
public class ProcessingIndexRebuild {
public static final Logger LOGGER = Logger.getLogger(ProcessingIndexCheck.class.getName());
// Could be any number between 100 and 500,000. Lower the number, lower memory usage.
// If it was too low, parallelization would be less effective.
// If it was too large, memory usage would slower overall execution, due to memory management.
private static final int MAX_QUEUED_SUBMITTED_TASKS = 10000;

private static Unmarshaller unmarshaller = null;
public static final Logger LOGGER = Logger.getLogger(ProcessingIndexCheck.class.getName());

static {
try {
JAXBContext jaxbContext = JAXBContext.newInstance(DigitalObject.class);
unmarshaller = jaxbContext.createUnmarshaller();
} catch (Exception e) {
LOGGER.log(Level.SEVERE, "Cannot init JAXB", e);
throw new RuntimeException(e);
}
}
private static final Unmarshaller unmarshaller = initUnmarshaller();

private volatile static long counter = 0;

Expand All @@ -75,16 +75,84 @@ public static void main(String[] args) throws IOException, SolrServerException,
} else {
objectStoreRoot = Paths.get(KConfiguration.getInstance().getProperty("objectStore.path"));
}
Files.walk(objectStoreRoot, FileVisitOption.FOLLOW_LINKS).parallel().filter(Files::isRegularFile).forEach(path -> {
String filename = path.toString();
try {
FileInputStream inputStream = new FileInputStream(path.toFile());
DigitalObject digitalObject = createDigitalObject(inputStream);
rebuildProcessingIndex(feeder, digitalObject);
} catch (Exception ex) {
LOGGER.log(Level.SEVERE, "Error processing file: " + filename, ex);

// ForkJoinPool is used to preserve parallelization.
// The default constructor of ForkJoinPool creates a pool with parallelism
// equal to Runtime.availableProcessors(), same as parallel streams.
ForkJoinPool forkJoinPool = new ForkJoinPool();

// Files.walkFileTree() is used because it does not store any Paths in memory,
// which makes it a more efficient solution to the problem compared to Files.walk().
Files.walkFileTree(objectStoreRoot,
Collections.singleton(FileVisitOption.FOLLOW_LINKS),
Integer.MAX_VALUE,
new FileVisitor<Path>() {
@Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
return FileVisitResult.CONTINUE;
}

@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
if (!Files.isRegularFile(file)) {
return FileVisitResult.CONTINUE;
}

if (forkJoinPool.getQueuedSubmissionCount() < MAX_QUEUED_SUBMITTED_TASKS) {
forkJoinPool.execute(() -> {
String filename = file.toString();
try (FileInputStream inputStream = new FileInputStream(file.toFile())) {
DigitalObject digitalObject = createDigitalObject(inputStream);
rebuildProcessingIndex(feeder, digitalObject);
} catch (Exception ex) {
LOGGER.log(Level.SEVERE, "Error processing file: " + filename, ex);
}
});
} else {
String filename = file.toString();
try (FileInputStream inputStream = new FileInputStream(file.toFile())) {
DigitalObject digitalObject = createDigitalObject(inputStream);
rebuildProcessingIndex(feeder, digitalObject);
} catch (Exception ex) {
LOGGER.log(Level.SEVERE, "Error processing file: " + filename, ex);
}
}

return FileVisitResult.CONTINUE;
}

@Override
public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException {
LOGGER.log(Level.SEVERE, "Error processing file: " + file.toString(), exc);

// This will allow the execution to continue uninterrupted,
// even in the event of encountering permission errors.
return FileVisitResult.CONTINUE;
}

@Override
public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
if (exc != null) {
LOGGER.log(Level.SEVERE, "Error searching directory : " + dir.toString(), exc);
}

// This will allow the execution to continue uninterrupted,
// even in the event of encountering permission errors.
return FileVisitResult.CONTINUE;
}
});

// Files.walk(objectStoreRoot, FileVisitOption.FOLLOW_LINKS).parallel().filter(Files::isRegularFile).forEach(path -> {
// String filename = path.toString();
// try {
// FileInputStream inputStream = new FileInputStream(path.toFile());
// DigitalObject digitalObject = createDigitalObject(inputStream);
// rebuildProcessingIndex(feeder, digitalObject);
// } catch (Exception ex) {
// LOGGER.log(Level.SEVERE, "Error processing file: " + filename, ex);
// }
// });

LOGGER.info("Finished tree walk in " + (System.currentTimeMillis() - start) + " ms");

fa.shutdown();
Expand Down Expand Up @@ -147,4 +215,14 @@ private static void rebuildProcessingIndexImpl(AkubraObject akubraObject, InputS
throw new RepositoryException(e);
}
}

private static Unmarshaller initUnmarshaller() {
try {
JAXBContext jaxbContext = JAXBContext.newInstance(DigitalObject.class);
return jaxbContext.createUnmarshaller();
} catch (Exception e) {
LOGGER.log(Level.SEVERE, "Cannot init JAXB", e);
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ public Session getSession(String name, String pass) {
}
if (pass == null) {
pass = properties.getProperty("mail.smtp.pass");
properties.put("mail.smtp.password", pass);
if (pass != null) {
properties.put("mail.smtp.password", pass);
}
}
Authenticator auth = new SMTPAuthenticator(name, pass);
Session session = Session.getInstance(properties, auth);
Expand Down
Loading

0 comments on commit 599d410

Please sign in to comment.