@@ -160,12 +160,12 @@ public class IndexDatabase {
160
160
private CopyOnWriteArrayList <IndexChangedListener > listeners ;
161
161
private File dirtyFile ;
162
162
private final Object lock = new Object ();
163
- private boolean dirty ;
163
+ private boolean dirty ; // Whether the index was modified either by adding or removing a document.
164
164
private boolean running ;
165
165
private boolean isCountingDeltas ;
166
166
private boolean isWithDirectoryCounts ;
167
167
private List <String > directories ;
168
- private LockFactory lockfact ;
168
+ private LockFactory lockFactory ;
169
169
private final BytesRef emptyBR = new BytesRef ("" );
170
170
171
171
// Directory where we store indexes
@@ -195,7 +195,7 @@ public IndexDatabase() throws IOException {
195
195
public IndexDatabase (Project project , IndexDownArgsFactory factory ) throws IOException {
196
196
indexDownArgsFactory = factory ;
197
197
this .project = project ;
198
- lockfact = NoLockFactory .INSTANCE ;
198
+ lockFactory = NoLockFactory .INSTANCE ;
199
199
initialize ();
200
200
}
201
201
@@ -329,15 +329,20 @@ private void initialize() throws IOException {
329
329
}
330
330
}
331
331
332
- lockfact = pickLockFactory (env );
333
- indexDirectory = FSDirectory .open (indexDir .toPath (), lockfact );
332
+ lockFactory = pickLockFactory (env );
333
+ indexDirectory = FSDirectory .open (indexDir .toPath (), lockFactory );
334
334
pathAccepter = env .getPathAccepter ();
335
335
analyzerGuru = new AnalyzerGuru ();
336
336
xrefDir = new File (env .getDataRootFile (), XREF_DIR );
337
337
listeners = new CopyOnWriteArrayList <>();
338
338
dirtyFile = new File (indexDir , "dirty" );
339
339
dirty = dirtyFile .exists ();
340
340
directories = new ArrayList <>();
341
+
342
+ if (dirty ) {
343
+ LOGGER .log (Level .WARNING , "Index in ''{0}'' is dirty, the last indexing was likely interrupted." +
344
+ " It might be worthwhile to reindex from scratch." , indexDir );
345
+ }
341
346
}
342
347
}
343
348
@@ -632,7 +637,10 @@ public void update() throws IOException {
632
637
try {
633
638
if (terms != null ) {
634
639
uidIter = terms .iterator ();
635
- TermsEnum .SeekStatus stat = uidIter .seekCeil (new BytesRef (startUid )); //init uid
640
+ // The seekCeil() is pretty important because it makes uidIter.term() to become non-null.
641
+ // Various indexer methods rely on this when working with the uidIter iterator - rather
642
+ // than calling uidIter.next() first thing, they check uidIter.term().
643
+ TermsEnum .SeekStatus stat = uidIter .seekCeil (new BytesRef (startUid ));
636
644
if (stat == TermsEnum .SeekStatus .END ) {
637
645
uidIter = null ;
638
646
LOGGER .log (Level .WARNING ,
@@ -720,9 +728,7 @@ public void update() throws IOException {
720
728
}
721
729
722
730
if (!isInterrupted () && isDirty ()) {
723
- if (env .isOptimizeDatabase ()) {
724
- optimize ();
725
- }
731
+ unsetDirty ();
726
732
env .setIndexTimestamp ();
727
733
}
728
734
}
@@ -808,11 +814,11 @@ void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOExcepti
808
814
}
809
815
810
816
/**
811
- * Optimize all index databases.
817
+ * Reduce segment counts of all index databases.
812
818
*
813
819
* @throws IOException if an error occurs
814
820
*/
815
- static CountDownLatch optimizeAll () throws IOException {
821
+ static void reduceSegmentCountAll () throws IOException {
816
822
List <IndexDatabase > dbs = new ArrayList <>();
817
823
RuntimeEnvironment env = RuntimeEnvironment .getInstance ();
818
824
IndexerParallelizer parallelizer = env .getIndexerParallelizer ();
@@ -827,30 +833,35 @@ static CountDownLatch optimizeAll() throws IOException {
827
833
CountDownLatch latch = new CountDownLatch (dbs .size ());
828
834
for (IndexDatabase d : dbs ) {
829
835
final IndexDatabase db = d ;
830
- if (db .isDirty ()) {
831
- parallelizer .getFixedExecutor ().submit (() -> {
832
- try {
833
- db .update ();
834
- } catch (Throwable e ) {
835
- LOGGER .log (Level .SEVERE ,
836
- "Problem updating lucene index database: " , e );
837
- } finally {
838
- latch .countDown ();
839
- }
840
- });
841
- }
836
+ parallelizer .getFixedExecutor ().submit (() -> {
837
+ try {
838
+ db .reduceSegmentCount ();
839
+ } catch (Throwable e ) {
840
+ LOGGER .log (Level .SEVERE ,
841
+ "Problem reducing segment count of Lucene index database: " , e );
842
+ } finally {
843
+ latch .countDown ();
844
+ }
845
+ });
846
+ }
847
+
848
+ try {
849
+ LOGGER .info ("Waiting for the Lucene segment count reduction to finish" );
850
+ latch .await ();
851
+ } catch (InterruptedException exp ) {
852
+ LOGGER .log (Level .WARNING , "Received interrupt while waiting" +
853
+ " for index segment count reduction to finish" , exp );
842
854
}
843
- return latch ;
844
855
}
845
856
846
857
/**
847
- * Optimize the index database.
858
+ * Reduce number of segments in the index database.
848
859
* @throws IOException I/O exception
849
860
*/
850
- public void optimize () throws IOException {
861
+ public void reduceSegmentCount () throws IOException {
851
862
synchronized (lock ) {
852
863
if (running ) {
853
- LOGGER .warning ("Optimize terminated... Someone else is updating / optimizing it !" );
864
+ LOGGER .warning ("Segment count reduction terminated... Someone else is running the operation !" );
854
865
return ;
855
866
}
856
867
running = true ;
@@ -861,25 +872,18 @@ public void optimize() throws IOException {
861
872
try {
862
873
Statistics elapsed = new Statistics ();
863
874
String projectDetail = this .project != null ? " for project " + project .getName () : "" ;
864
- LOGGER .log (Level .INFO , "Optimizing the index{0}" , projectDetail );
875
+ LOGGER .log (Level .INFO , "Reducing number of segments in the index{0}" , projectDetail );
865
876
Analyzer analyzer = new StandardAnalyzer ();
866
877
IndexWriterConfig conf = new IndexWriterConfig (analyzer );
867
878
conf .setOpenMode (OpenMode .CREATE_OR_APPEND );
868
879
869
880
wrt = new IndexWriter (indexDirectory , conf );
870
- wrt .forceMerge (1 ); // this is deprecated and not needed anymore
871
- elapsed .report (LOGGER , String .format ("Done optimizing index%s" , projectDetail ),
872
- "indexer.db.optimize" );
873
- synchronized (lock ) {
874
- if (dirtyFile .exists () && !dirtyFile .delete ()) {
875
- LOGGER .log (Level .FINE , "Failed to remove \" dirty-file\" : {0}" ,
876
- dirtyFile .getAbsolutePath ());
877
- }
878
- dirty = false ;
879
- }
881
+ wrt .forceMerge (1 );
882
+ elapsed .report (LOGGER , String .format ("Done reducing number of segments in index%s" , projectDetail ),
883
+ "indexer.db.reduceSegments" );
880
884
} catch (IOException e ) {
881
885
writerException = e ;
882
- LOGGER .log (Level .SEVERE , "ERROR: optimizing index" , e );
886
+ LOGGER .log (Level .SEVERE , "ERROR: reducing number of segments index" , e );
883
887
} finally {
884
888
if (wrt != null ) {
885
889
try {
@@ -925,6 +929,15 @@ private void setDirty() {
925
929
}
926
930
}
927
931
932
+ private void unsetDirty () {
933
+ synchronized (lock ) {
934
+ if (dirtyFile .exists () && !dirtyFile .delete ()) {
935
+ LOGGER .log (Level .FINE , "Failed to remove \" dirty-file\" : {0}" , dirtyFile .getAbsolutePath ());
936
+ }
937
+ dirty = false ;
938
+ }
939
+ }
940
+
928
941
private File whatXrefFile (String path , boolean compress ) {
929
942
String xrefPath = compress ? TandemPath .join (path , ".gz" ) : path ;
930
943
return new File (xrefDir , xrefPath );
@@ -1688,6 +1701,7 @@ private void indexParallel(String dir, IndexDownArgs args) {
1688
1701
}
1689
1702
}))).get ();
1690
1703
} catch (InterruptedException | ExecutionException e ) {
1704
+ interrupted = true ;
1691
1705
int successCount = successCounter .intValue ();
1692
1706
double successPct = 100.0 * successCount / worksCount ;
1693
1707
String exmsg = String .format ("%d successes (%.1f%%) after aborting parallel-indexing" ,
@@ -1793,17 +1807,11 @@ public Set<String> getFiles() throws IOException {
1793
1807
terms = MultiTerms .getTerms (ireader , QueryBuilder .U );
1794
1808
iter = terms .iterator (); // init uid iterator
1795
1809
}
1796
- while (iter != null && iter .term () != null ) {
1797
- String value = iter .term ().utf8ToString ();
1798
- if (value .isEmpty ()) {
1799
- iter .next ();
1800
- continue ;
1801
- }
1802
-
1803
- files .add (Util .uid2url (value ));
1804
- BytesRef next = iter .next ();
1805
- if (next == null ) {
1806
- iter = null ;
1810
+ BytesRef term ;
1811
+ while (iter != null && (term = iter .next ()) != null ) {
1812
+ String value = term .utf8ToString ();
1813
+ if (!value .isEmpty ()) {
1814
+ files .add (Util .uid2url (value ));
1807
1815
}
1808
1816
}
1809
1817
} finally {
@@ -1840,65 +1848,6 @@ public int getNumFiles() throws IOException {
1840
1848
}
1841
1849
}
1842
1850
1843
- static void listFrequentTokens (List <String > subFiles ) throws IOException {
1844
- final int limit = 4 ;
1845
-
1846
- RuntimeEnvironment env = RuntimeEnvironment .getInstance ();
1847
- if (env .hasProjects ()) {
1848
- if (subFiles == null || subFiles .isEmpty ()) {
1849
- for (Project project : env .getProjectList ()) {
1850
- IndexDatabase db = new IndexDatabase (project );
1851
- db .listTokens (limit );
1852
- }
1853
- } else {
1854
- for (String path : subFiles ) {
1855
- Project project = Project .getProject (path );
1856
- if (project == null ) {
1857
- LOGGER .log (Level .WARNING , "Could not find a project for \" {0}\" " , path );
1858
- } else {
1859
- IndexDatabase db = new IndexDatabase (project );
1860
- db .listTokens (limit );
1861
- }
1862
- }
1863
- }
1864
- } else {
1865
- IndexDatabase db = new IndexDatabase ();
1866
- db .listTokens (limit );
1867
- }
1868
- }
1869
-
1870
- public void listTokens (int freq ) throws IOException {
1871
- IndexReader ireader = null ;
1872
- TermsEnum iter = null ;
1873
- Terms terms ;
1874
-
1875
- try {
1876
- ireader = DirectoryReader .open (indexDirectory );
1877
- if (ireader .numDocs () > 0 ) {
1878
- terms = MultiTerms .getTerms (ireader , QueryBuilder .DEFS );
1879
- iter = terms .iterator (); // init uid iterator
1880
- }
1881
- while (iter != null && iter .term () != null ) {
1882
- if (iter .docFreq () > 16 && iter .term ().utf8ToString ().length () > freq ) {
1883
- LOGGER .warning (iter .term ().utf8ToString ());
1884
- }
1885
- BytesRef next = iter .next ();
1886
- if (next == null ) {
1887
- iter = null ;
1888
- }
1889
- }
1890
- } finally {
1891
-
1892
- if (ireader != null ) {
1893
- try {
1894
- ireader .close ();
1895
- } catch (IOException e ) {
1896
- LOGGER .log (Level .WARNING , "An error occurred while closing index reader" , e );
1897
- }
1898
- }
1899
- }
1900
- }
1901
-
1902
1851
/**
1903
1852
* Get an indexReader for the Index database where a given file.
1904
1853
*
0 commit comments