Skip to content

Commit c3b0c92

Browse files
authored
make index database "optimization" explicit operation (#3983)
fixes #3982
1 parent 4f7f6cd commit c3b0c92

25 files changed

+142
-247
lines changed

Diff for: opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java

-10
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ public final class Configuration {
167167
private String reviewPattern;
168168
private String webappLAF;
169169
private RemoteSCM remoteScmSupported;
170-
private boolean optimizeDatabase;
171170
private boolean quickContextScan;
172171

173172
private LuceneLockName luceneLocking = LuceneLockName.OFF;
@@ -557,7 +556,6 @@ public Configuration() {
557556
setMessageLimit(500);
558557
setNavigateWindowEnabled(false);
559558
setNestingMaximum(1);
560-
setOptimizeDatabase(true);
561559
setPluginDirectory(null);
562560
setPluginStack(new AuthorizationStack(AuthControlFlag.REQUIRED, "default stack"));
563561
setPrintProgress(false);
@@ -1088,14 +1086,6 @@ public void setRemoteScmSupported(RemoteSCM remoteScmSupported) {
10881086
this.remoteScmSupported = remoteScmSupported;
10891087
}
10901088

1091-
public boolean isOptimizeDatabase() {
1092-
return optimizeDatabase;
1093-
}
1094-
1095-
public void setOptimizeDatabase(boolean optimizeDatabase) {
1096-
this.optimizeDatabase = optimizeDatabase;
1097-
}
1098-
10991089
public LuceneLockName getLuceneLocking() {
11001090
return luceneLocking;
11011091
}

Diff for: opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java

-8
Original file line numberDiff line numberDiff line change
@@ -1085,14 +1085,6 @@ public void setRemoteScmSupported(Configuration.RemoteSCM remoteScmSupported) {
10851085
syncWriteConfiguration(remoteScmSupported, Configuration::setRemoteScmSupported);
10861086
}
10871087

1088-
public boolean isOptimizeDatabase() {
1089-
return syncReadConfiguration(Configuration::isOptimizeDatabase);
1090-
}
1091-
1092-
public void setOptimizeDatabase(boolean optimizeDatabase) {
1093-
syncWriteConfiguration(optimizeDatabase, Configuration::setOptimizeDatabase);
1094-
}
1095-
10961088
public LuceneLockName getLuceneLocking() {
10971089
return syncReadConfiguration(Configuration::getLuceneLocking);
10981090
}

Diff for: opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java

+58-109
Original file line numberDiff line numberDiff line change
@@ -160,12 +160,12 @@ public class IndexDatabase {
160160
private CopyOnWriteArrayList<IndexChangedListener> listeners;
161161
private File dirtyFile;
162162
private final Object lock = new Object();
163-
private boolean dirty;
163+
private boolean dirty; // Whether the index was modified either by adding or removing a document.
164164
private boolean running;
165165
private boolean isCountingDeltas;
166166
private boolean isWithDirectoryCounts;
167167
private List<String> directories;
168-
private LockFactory lockfact;
168+
private LockFactory lockFactory;
169169
private final BytesRef emptyBR = new BytesRef("");
170170

171171
// Directory where we store indexes
@@ -195,7 +195,7 @@ public IndexDatabase() throws IOException {
195195
public IndexDatabase(Project project, IndexDownArgsFactory factory) throws IOException {
196196
indexDownArgsFactory = factory;
197197
this.project = project;
198-
lockfact = NoLockFactory.INSTANCE;
198+
lockFactory = NoLockFactory.INSTANCE;
199199
initialize();
200200
}
201201

@@ -329,15 +329,20 @@ private void initialize() throws IOException {
329329
}
330330
}
331331

332-
lockfact = pickLockFactory(env);
333-
indexDirectory = FSDirectory.open(indexDir.toPath(), lockfact);
332+
lockFactory = pickLockFactory(env);
333+
indexDirectory = FSDirectory.open(indexDir.toPath(), lockFactory);
334334
pathAccepter = env.getPathAccepter();
335335
analyzerGuru = new AnalyzerGuru();
336336
xrefDir = new File(env.getDataRootFile(), XREF_DIR);
337337
listeners = new CopyOnWriteArrayList<>();
338338
dirtyFile = new File(indexDir, "dirty");
339339
dirty = dirtyFile.exists();
340340
directories = new ArrayList<>();
341+
342+
if (dirty) {
343+
LOGGER.log(Level.WARNING, "Index in ''{0}'' is dirty, the last indexing was likely interrupted." +
344+
" It might be worthwhile to reindex from scratch.", indexDir);
345+
}
341346
}
342347
}
343348

@@ -632,7 +637,10 @@ public void update() throws IOException {
632637
try {
633638
if (terms != null) {
634639
uidIter = terms.iterator();
635-
TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startUid)); //init uid
640+
// The seekCeil() is pretty important because it makes uidIter.term() to become non-null.
641+
// Various indexer methods rely on this when working with the uidIter iterator - rather
642+
// than calling uidIter.next() first thing, they check uidIter.term().
643+
TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startUid));
636644
if (stat == TermsEnum.SeekStatus.END) {
637645
uidIter = null;
638646
LOGGER.log(Level.WARNING,
@@ -720,9 +728,7 @@ public void update() throws IOException {
720728
}
721729

722730
if (!isInterrupted() && isDirty()) {
723-
if (env.isOptimizeDatabase()) {
724-
optimize();
725-
}
731+
unsetDirty();
726732
env.setIndexTimestamp();
727733
}
728734
}
@@ -808,11 +814,11 @@ void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOExcepti
808814
}
809815

810816
/**
811-
* Optimize all index databases.
817+
* Reduce segment counts of all index databases.
812818
*
813819
* @throws IOException if an error occurs
814820
*/
815-
static CountDownLatch optimizeAll() throws IOException {
821+
static void reduceSegmentCountAll() throws IOException {
816822
List<IndexDatabase> dbs = new ArrayList<>();
817823
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
818824
IndexerParallelizer parallelizer = env.getIndexerParallelizer();
@@ -827,30 +833,35 @@ static CountDownLatch optimizeAll() throws IOException {
827833
CountDownLatch latch = new CountDownLatch(dbs.size());
828834
for (IndexDatabase d : dbs) {
829835
final IndexDatabase db = d;
830-
if (db.isDirty()) {
831-
parallelizer.getFixedExecutor().submit(() -> {
832-
try {
833-
db.update();
834-
} catch (Throwable e) {
835-
LOGGER.log(Level.SEVERE,
836-
"Problem updating lucene index database: ", e);
837-
} finally {
838-
latch.countDown();
839-
}
840-
});
841-
}
836+
parallelizer.getFixedExecutor().submit(() -> {
837+
try {
838+
db.reduceSegmentCount();
839+
} catch (Throwable e) {
840+
LOGGER.log(Level.SEVERE,
841+
"Problem reducing segment count of Lucene index database: ", e);
842+
} finally {
843+
latch.countDown();
844+
}
845+
});
846+
}
847+
848+
try {
849+
LOGGER.info("Waiting for the Lucene segment count reduction to finish");
850+
latch.await();
851+
} catch (InterruptedException exp) {
852+
LOGGER.log(Level.WARNING, "Received interrupt while waiting" +
853+
" for index segment count reduction to finish", exp);
842854
}
843-
return latch;
844855
}
845856

846857
/**
847-
* Optimize the index database.
858+
* Reduce number of segments in the index database.
848859
* @throws IOException I/O exception
849860
*/
850-
public void optimize() throws IOException {
861+
public void reduceSegmentCount() throws IOException {
851862
synchronized (lock) {
852863
if (running) {
853-
LOGGER.warning("Optimize terminated... Someone else is updating / optimizing it!");
864+
LOGGER.warning("Segment count reduction terminated... Someone else is running the operation!");
854865
return;
855866
}
856867
running = true;
@@ -861,25 +872,18 @@ public void optimize() throws IOException {
861872
try {
862873
Statistics elapsed = new Statistics();
863874
String projectDetail = this.project != null ? " for project " + project.getName() : "";
864-
LOGGER.log(Level.INFO, "Optimizing the index{0}", projectDetail);
875+
LOGGER.log(Level.INFO, "Reducing number of segments in the index{0}", projectDetail);
865876
Analyzer analyzer = new StandardAnalyzer();
866877
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
867878
conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
868879

869880
wrt = new IndexWriter(indexDirectory, conf);
870-
wrt.forceMerge(1); // this is deprecated and not needed anymore
871-
elapsed.report(LOGGER, String.format("Done optimizing index%s", projectDetail),
872-
"indexer.db.optimize");
873-
synchronized (lock) {
874-
if (dirtyFile.exists() && !dirtyFile.delete()) {
875-
LOGGER.log(Level.FINE, "Failed to remove \"dirty-file\": {0}",
876-
dirtyFile.getAbsolutePath());
877-
}
878-
dirty = false;
879-
}
881+
wrt.forceMerge(1);
882+
elapsed.report(LOGGER, String.format("Done reducing number of segments in index%s", projectDetail),
883+
"indexer.db.reduceSegments");
880884
} catch (IOException e) {
881885
writerException = e;
882-
LOGGER.log(Level.SEVERE, "ERROR: optimizing index", e);
886+
LOGGER.log(Level.SEVERE, "ERROR: reducing number of segments index", e);
883887
} finally {
884888
if (wrt != null) {
885889
try {
@@ -925,6 +929,15 @@ private void setDirty() {
925929
}
926930
}
927931

932+
private void unsetDirty() {
933+
synchronized (lock) {
934+
if (dirtyFile.exists() && !dirtyFile.delete()) {
935+
LOGGER.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath());
936+
}
937+
dirty = false;
938+
}
939+
}
940+
928941
private File whatXrefFile(String path, boolean compress) {
929942
String xrefPath = compress ? TandemPath.join(path, ".gz") : path;
930943
return new File(xrefDir, xrefPath);
@@ -1688,6 +1701,7 @@ private void indexParallel(String dir, IndexDownArgs args) {
16881701
}
16891702
}))).get();
16901703
} catch (InterruptedException | ExecutionException e) {
1704+
interrupted = true;
16911705
int successCount = successCounter.intValue();
16921706
double successPct = 100.0 * successCount / worksCount;
16931707
String exmsg = String.format("%d successes (%.1f%%) after aborting parallel-indexing",
@@ -1793,17 +1807,11 @@ public Set<String> getFiles() throws IOException {
17931807
terms = MultiTerms.getTerms(ireader, QueryBuilder.U);
17941808
iter = terms.iterator(); // init uid iterator
17951809
}
1796-
while (iter != null && iter.term() != null) {
1797-
String value = iter.term().utf8ToString();
1798-
if (value.isEmpty()) {
1799-
iter.next();
1800-
continue;
1801-
}
1802-
1803-
files.add(Util.uid2url(value));
1804-
BytesRef next = iter.next();
1805-
if (next == null) {
1806-
iter = null;
1810+
BytesRef term;
1811+
while (iter != null && (term = iter.next()) != null) {
1812+
String value = term.utf8ToString();
1813+
if (!value.isEmpty()) {
1814+
files.add(Util.uid2url(value));
18071815
}
18081816
}
18091817
} finally {
@@ -1840,65 +1848,6 @@ public int getNumFiles() throws IOException {
18401848
}
18411849
}
18421850

1843-
static void listFrequentTokens(List<String> subFiles) throws IOException {
1844-
final int limit = 4;
1845-
1846-
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1847-
if (env.hasProjects()) {
1848-
if (subFiles == null || subFiles.isEmpty()) {
1849-
for (Project project : env.getProjectList()) {
1850-
IndexDatabase db = new IndexDatabase(project);
1851-
db.listTokens(limit);
1852-
}
1853-
} else {
1854-
for (String path : subFiles) {
1855-
Project project = Project.getProject(path);
1856-
if (project == null) {
1857-
LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path);
1858-
} else {
1859-
IndexDatabase db = new IndexDatabase(project);
1860-
db.listTokens(limit);
1861-
}
1862-
}
1863-
}
1864-
} else {
1865-
IndexDatabase db = new IndexDatabase();
1866-
db.listTokens(limit);
1867-
}
1868-
}
1869-
1870-
public void listTokens(int freq) throws IOException {
1871-
IndexReader ireader = null;
1872-
TermsEnum iter = null;
1873-
Terms terms;
1874-
1875-
try {
1876-
ireader = DirectoryReader.open(indexDirectory);
1877-
if (ireader.numDocs() > 0) {
1878-
terms = MultiTerms.getTerms(ireader, QueryBuilder.DEFS);
1879-
iter = terms.iterator(); // init uid iterator
1880-
}
1881-
while (iter != null && iter.term() != null) {
1882-
if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
1883-
LOGGER.warning(iter.term().utf8ToString());
1884-
}
1885-
BytesRef next = iter.next();
1886-
if (next == null) {
1887-
iter = null;
1888-
}
1889-
}
1890-
} finally {
1891-
1892-
if (ireader != null) {
1893-
try {
1894-
ireader.close();
1895-
} catch (IOException e) {
1896-
LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
1897-
}
1898-
}
1899-
}
1900-
}
1901-
19021851
/**
19031852
* Get an indexReader for the Index database where a given file.
19041853
*

0 commit comments

Comments
 (0)