Skip to content

improve Index searcher efficiency #4012

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.util.logging.Logger;
import java.util.regex.PatternSyntaxException;

import org.jetbrains.annotations.Nullable;
import org.jetbrains.annotations.VisibleForTesting;
import org.opengrok.indexer.logger.LoggerFactory;
import org.opengrok.indexer.util.ClassUtil;
Expand Down Expand Up @@ -548,6 +549,7 @@ public static Project getProject(String path) {
* @param file the file to lookup
* @return the project that this file belongs to (or {@code null} if the file doesn't belong to a project)
*/
@Nullable
public static Project getProject(File file) {
Project ret = null;
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ public final class RuntimeEnvironment {
private final LazilyInstantiate<IndexSearcherFactory> lzIndexSearcherFactory;

private final Map<Project, List<RepositoryInfo>> repository_map = new ConcurrentHashMap<>();
/**
* Map of project name (or empty string in case of project-less configuration) to SearcherManager object.
*/
private final Map<String, SearcherManager> searcherManagerMap = new ConcurrentHashMap<>();

private String configURI;
Expand Down Expand Up @@ -151,8 +154,7 @@ public List<String> getSubFiles() {
private final Map<String, FileCollector> fileCollectorMap = new HashMap<>();

/**
* Creates a new instance of RuntimeEnvironment. Private to ensure a
* singleton anti-pattern.
* Creates a new instance of RuntimeEnvironment. Private to ensure a singleton anti-pattern.
*/
private RuntimeEnvironment() {
configuration = new Configuration();
Expand Down Expand Up @@ -438,20 +440,18 @@ public void setSourceRoot(String sourceRoot) {
}

/**
* Returns a path relative to source root. This would just be a simple
* substring operation, except we need to support symlinks outside the
* source root.
* Returns a path relative to source root. This would just be a simple substring operation,
* except we need to support symlinks outside the source root.
*
* @param file A file to resolve
* @return Path relative to source root
* @throws IOException If an IO error occurs
* @throws FileNotFoundException if the file is not relative to source root
* or if {@code sourceRoot} is not defined
* @throws ForbiddenSymlinkException if symbolic-link checking encounters
* an ineligible link
* @throws ForbiddenSymlinkException if symbolic-link checking encounters an ineligible link
*/
public String getPathRelativeToSourceRoot(File file)
throws IOException, ForbiddenSymlinkException {
public String getPathRelativeToSourceRoot(File file) throws IOException, ForbiddenSymlinkException {

String sourceRoot = getSourceRootPath();
if (sourceRoot == null) {
throw new FileNotFoundException("sourceRoot is not defined");
Expand Down Expand Up @@ -1809,27 +1809,51 @@ public void maybeRefreshIndexSearchers() {
}

/**
* Get IndexSearcher for given project.
* Get IndexSearcher for given project or global IndexSearcher.
* Wrapper of {@link #getSuperIndexSearcher(String)}. Make sure to release the returned
* {@link SuperIndexSearcher} instance.
* @param file file object
* @return SuperIndexSearcher instance
* @throws IOException on error when reading
*/
public SuperIndexSearcher getSuperIndexSearcher(File file) throws IOException {
String name = "";
if (RuntimeEnvironment.getInstance().hasProjects()) {
Project p = Project.getProject(file);
if (p != null) {
name = p.getName();
} else {
throw new IOException(String.format("project for '%s' not found", file));
}
}

return getSuperIndexSearcher(name);
}

/**
* Get IndexSearcher for given project or global IndexSearcher.
* Each IndexSearcher is born from a SearcherManager object. There is one SearcherManager for every project.
* This schema makes it possible to reuse IndexSearcher/IndexReader objects so the heavy lifting
* (esp. system calls) performed in {@code FSDirectory} and {@code DirectoryReader} happens only once for a project.
* (esp. system calls) performed in {@code FSDirectory} and {@code DirectoryReader} happens only once
* for given index.
* <p>
* The caller has to make sure that the IndexSearcher is returned to the SearcherManager.
* This is done with {@code searcherManagerInstance.release(indexSearcherInstance);}
* The return of the IndexSearcher should happen only after the search result data are read fully.
*
* @param projectName project
* @return SearcherManager for given project
* @param searcherName project name or empty string for project-less configuration
* @return SuperIndexSearcher instance
* @throws IOException I/O exception
*/
@SuppressWarnings("java:S2095")
public SuperIndexSearcher getIndexSearcher(String projectName) throws IOException {
public SuperIndexSearcher getSuperIndexSearcher(String searcherName) throws IOException {

SearcherManager mgr = searcherManagerMap.get(projectName);
SearcherManager mgr = searcherManagerMap.get(searcherName);
if (mgr == null) {
File indexDir = new File(getDataRootPath(), IndexDatabase.INDEX_DIR);
Directory dir = FSDirectory.open(new File(indexDir, projectName).toPath());
Directory dir = FSDirectory.open(new File(indexDir, searcherName).toPath());
mgr = new SearcherManager(dir, getSuperIndexSearcherFactory());
searcherManagerMap.put(projectName, mgr);
searcherManagerMap.put(searcherName, mgr);
}

SuperIndexSearcher searcher = (SuperIndexSearcher) mgr.acquire();
Expand Down Expand Up @@ -1868,7 +1892,7 @@ public void refreshSearcherManagerMap() {

/**
* Return collection of IndexReader objects as MultiReader object for given list of projects.
* The caller is responsible for releasing the IndexSearcher objects.
* The caller is responsible for releasing the {@link SuperIndexSearcher} objects.
*
* @param projects list of projects
* @param searcherList each SuperIndexSearcher produced will be put into this list
Expand All @@ -1882,7 +1906,7 @@ public MultiReader getMultiReader(SortedSet<String> projects, List<SuperIndexSea
// TODO might need to rewrite to Project instead of String, need changes in projects.jspf too.
for (String proj : projects) {
try {
SuperIndexSearcher searcher = getIndexSearcher(proj);
SuperIndexSearcher searcher = getSuperIndexSearcher(proj);
subreaders[ii++] = searcher.getIndexReader();
searcherList.add(searcher);
} catch (IOException | NullPointerException ex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
*/
package org.opengrok.indexer.configuration;

import java.io.IOException;
import java.util.concurrent.ExecutorService;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
Expand Down Expand Up @@ -51,4 +52,8 @@ public void setSearcherManager(SearcherManager s) {
public SearcherManager getSearcherManager() {
return (searcherManager);
}

public void release() throws IOException {
getSearcherManager().release(this);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

/**
* Factory for producing IndexSearcher objects.
* This is used inside getIndexSearcher() to produce new SearcherManager objects
* This is used inside getSuperIndexSearcher() to produce new SearcherManager objects
* to make sure the searcher threads are constrained to single thread pool.
* @author vkotal
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,16 @@

import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.opengrok.indexer.analysis.CompatibleAnalyser;
import org.opengrok.indexer.configuration.RuntimeEnvironment;
import org.opengrok.indexer.index.IndexDatabase;
import org.opengrok.indexer.configuration.SuperIndexSearcher;
import org.opengrok.indexer.logger.LoggerFactory;
import org.opengrok.indexer.search.QueryBuilder;

Expand Down Expand Up @@ -93,17 +91,12 @@ public class DirectoryHistoryReader {
* @throws IOException when index cannot be accessed
*/
public DirectoryHistoryReader(String path) throws IOException {
IndexReader indexReader = null;
SuperIndexSearcher searcher = null;
try {
// Prepare for index search.
String srcRoot = RuntimeEnvironment.getInstance().getSourceRootPath();
indexReader = IndexDatabase.getIndexReader(path);
if (indexReader == null) {
throw new IOException(String.format("Could not locate index database for '%s'", path));
}
// The search results will be sorted by date.
IndexSearcher searcher = RuntimeEnvironment.getInstance().
getIndexSearcherFactory().newSearcher(indexReader);
searcher = RuntimeEnvironment.getInstance().getSuperIndexSearcher(new File(srcRoot, path));
SortField sfield = new SortField(QueryBuilder.DATE, SortField.Type.STRING, true);
Sort sort = new Sort(sfield);
QueryParser qparser = new QueryParser(QueryBuilder.PATH, new CompatibleAnalyser());
Expand Down Expand Up @@ -166,9 +159,9 @@ public DirectoryHistoryReader(String path) throws IOException {
// into history object.
history = new History(entries);
} finally {
if (indexReader != null) {
if (searcher != null) {
try {
indexReader.close();
searcher.release();
} catch (Exception ex) {
LOGGER.log(Level.WARNING,
String.format("An error occurred while closing index reader for '%s'", path), ex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.AlreadyClosedException;
Expand All @@ -99,6 +98,7 @@
import org.opengrok.indexer.configuration.PathAccepter;
import org.opengrok.indexer.configuration.Project;
import org.opengrok.indexer.configuration.RuntimeEnvironment;
import org.opengrok.indexer.configuration.SuperIndexSearcher;
import org.opengrok.indexer.history.FileCollector;
import org.opengrok.indexer.history.HistoryGuru;
import org.opengrok.indexer.history.Repository;
Expand Down Expand Up @@ -1848,45 +1848,11 @@ public int getNumFiles() throws IOException {
}
}

/**
* Get an indexReader for the Index database where a given file.
*
* @param path the file to get the database for
* @return The index database where the file should be located or {@code null} if it cannot be located.
*/
@SuppressWarnings("java:S2095")
@Nullable
public static IndexReader getIndexReader(String path) {
IndexReader ret = null;

RuntimeEnvironment env = RuntimeEnvironment.getInstance();
File indexDir = new File(env.getDataRootFile(), INDEX_DIR);

if (env.hasProjects()) {
Project p = Project.getProject(path);
if (p == null) {
return null;
}
indexDir = new File(indexDir, p.getPath());
}
try {
FSDirectory fdir = FSDirectory.open(indexDir.toPath(), NoLockFactory.INSTANCE);
if (indexDir.exists() && DirectoryReader.indexExists(fdir)) {
ret = DirectoryReader.open(fdir);
}
} catch (Exception ex) {
LOGGER.log(Level.SEVERE, "Failed to open index: {0}", indexDir.getAbsolutePath());
LOGGER.log(Level.FINE, "Stack Trace: ", ex);
}
return ret;
}

/**
* Get the latest definitions for a file from the index.
*
* @param file the file whose definitions to find
* @return definitions for the file, or {@code null} if they could not be
* found
* @return definitions for the file, or {@code null} if they could not be found
* @throws IOException if an error happens when accessing the index
* @throws ParseException if an error happens when building the Lucene query
* @throws ClassNotFoundException if the class for the stored definitions
Expand All @@ -1909,11 +1875,13 @@ public static Definitions getDefinitions(File file) throws ParseException, IOExc

/**
* @param file File object for a file under source root
* @return Document object for the file or {@code null}
* @return Document object for the file or {@code null} if no document was found
* @throws IOException on I/O error
* @throws ParseException on problem with building Query
*/
public static Document getDocument(File file) throws IOException, ParseException {
@Nullable
public static Document getDocument(File file) throws ParseException, IOException {

RuntimeEnvironment env = RuntimeEnvironment.getInstance();
String path;
try {
Expand All @@ -1925,36 +1893,28 @@ public static Document getDocument(File file) throws IOException, ParseException
// Sanitize Windows path delimiters in order not to conflict with Lucene escape character.
path = path.replace("\\", "/");

try (IndexReader indexReader = getIndexReader(path)) {
return getDocument(path, indexReader);
}
}

@Nullable
private static Document getDocument(String path, IndexReader indexReader) throws ParseException, IOException {
if (indexReader == null) {
// No index, no document..
return null;
}

Document doc;
Query q = new QueryBuilder().setPath(path).build();
IndexSearcher searcher = RuntimeEnvironment.getInstance().getIndexSearcherFactory().newSearcher(indexReader);
Statistics stat = new Statistics();
TopDocs top = searcher.search(q, 1);
stat.report(LOGGER, Level.FINEST, "search via getDocument() done",
"search.latency", new String[]{"category", "getdocument",
"outcome", top.totalHits.value == 0 ? "empty" : "success"});
if (top.totalHits.value == 0) {
// No hits, no document...
return null;
}
doc = searcher.doc(top.scoreDocs[0].doc);
String foundPath = doc.get(QueryBuilder.PATH);
SuperIndexSearcher searcher = env.getSuperIndexSearcher(file);
try {
Statistics stat = new Statistics();
TopDocs top = searcher.search(q, 1);
stat.report(LOGGER, Level.FINEST, "search via getDocument() done",
"search.latency", new String[]{"category", "getdocument",
"outcome", top.totalHits.value == 0 ? "empty" : "success"});
if (top.totalHits.value == 0) {
// No hits, no document...
return null;
}
doc = searcher.doc(top.scoreDocs[0].doc);
String foundPath = doc.get(QueryBuilder.PATH);

// Only use the document if we found an exact match.
if (!path.equals(foundPath)) {
return null;
// Only use the document if we found an exact match.
if (!path.equals(foundPath)) {
return null;
}
} finally {
searcher.release();
}

return doc;
Expand Down
Loading