@@ -160,12 +160,12 @@ public class IndexDatabase {
160160 private CopyOnWriteArrayList <IndexChangedListener > listeners ;
161161 private File dirtyFile ;
162162 private final Object lock = new Object ();
163- private boolean dirty ;
163+ private boolean dirty ; // Whether the index was modified either by adding or removing a document.
164164 private boolean running ;
165165 private boolean isCountingDeltas ;
166166 private boolean isWithDirectoryCounts ;
167167 private List <String > directories ;
168- private LockFactory lockfact ;
168+ private LockFactory lockFactory ;
169169 private final BytesRef emptyBR = new BytesRef ("" );
170170
171171 // Directory where we store indexes
@@ -195,7 +195,7 @@ public IndexDatabase() throws IOException {
195195 public IndexDatabase (Project project , IndexDownArgsFactory factory ) throws IOException {
196196 indexDownArgsFactory = factory ;
197197 this .project = project ;
198- lockfact = NoLockFactory .INSTANCE ;
198+ lockFactory = NoLockFactory .INSTANCE ;
199199 initialize ();
200200 }
201201
@@ -329,15 +329,20 @@ private void initialize() throws IOException {
329329 }
330330 }
331331
332- lockfact = pickLockFactory (env );
333- indexDirectory = FSDirectory .open (indexDir .toPath (), lockfact );
332+ lockFactory = pickLockFactory (env );
333+ indexDirectory = FSDirectory .open (indexDir .toPath (), lockFactory );
334334 pathAccepter = env .getPathAccepter ();
335335 analyzerGuru = new AnalyzerGuru ();
336336 xrefDir = new File (env .getDataRootFile (), XREF_DIR );
337337 listeners = new CopyOnWriteArrayList <>();
338338 dirtyFile = new File (indexDir , "dirty" );
339339 dirty = dirtyFile .exists ();
340340 directories = new ArrayList <>();
341+
342+ if (dirty ) {
343+ LOGGER .log (Level .WARNING , "Index in ''{0}'' is dirty, the last indexing was likely interrupted." +
344+ " It might be worthwhile to reindex from scratch." , indexDir );
345+ }
341346 }
342347 }
343348
@@ -632,7 +637,10 @@ public void update() throws IOException {
632637 try {
633638 if (terms != null ) {
634639 uidIter = terms .iterator ();
635- TermsEnum .SeekStatus stat = uidIter .seekCeil (new BytesRef (startUid )); //init uid
640+ // The seekCeil() is pretty important because it makes uidIter.term() to become non-null.
641+ // Various indexer methods rely on this when working with the uidIter iterator - rather
642+ // than calling uidIter.next() first thing, they check uidIter.term().
643+ TermsEnum .SeekStatus stat = uidIter .seekCeil (new BytesRef (startUid ));
636644 if (stat == TermsEnum .SeekStatus .END ) {
637645 uidIter = null ;
638646 LOGGER .log (Level .WARNING ,
@@ -720,9 +728,7 @@ public void update() throws IOException {
720728 }
721729
722730 if (!isInterrupted () && isDirty ()) {
723- if (env .isOptimizeDatabase ()) {
724- optimize ();
725- }
731+ unsetDirty ();
726732 env .setIndexTimestamp ();
727733 }
728734 }
@@ -808,11 +814,11 @@ void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOExcepti
808814 }
809815
810816 /**
811- * Optimize all index databases.
817+ * Reduce segment counts of all index databases.
812818 *
813819 * @throws IOException if an error occurs
814820 */
815- static CountDownLatch optimizeAll () throws IOException {
821+ static void reduceSegmentCountAll () throws IOException {
816822 List <IndexDatabase > dbs = new ArrayList <>();
817823 RuntimeEnvironment env = RuntimeEnvironment .getInstance ();
818824 IndexerParallelizer parallelizer = env .getIndexerParallelizer ();
@@ -827,30 +833,35 @@ static CountDownLatch optimizeAll() throws IOException {
827833 CountDownLatch latch = new CountDownLatch (dbs .size ());
828834 for (IndexDatabase d : dbs ) {
829835 final IndexDatabase db = d ;
830- if (db .isDirty ()) {
831- parallelizer .getFixedExecutor ().submit (() -> {
832- try {
833- db .update ();
834- } catch (Throwable e ) {
835- LOGGER .log (Level .SEVERE ,
836- "Problem updating lucene index database: " , e );
837- } finally {
838- latch .countDown ();
839- }
840- });
841- }
836+ parallelizer .getFixedExecutor ().submit (() -> {
837+ try {
838+ db .reduceSegmentCount ();
839+ } catch (Throwable e ) {
840+ LOGGER .log (Level .SEVERE ,
841+ "Problem reducing segment count of Lucene index database: " , e );
842+ } finally {
843+ latch .countDown ();
844+ }
845+ });
846+ }
847+
848+ try {
849+ LOGGER .info ("Waiting for the Lucene segment count reduction to finish" );
850+ latch .await ();
851+ } catch (InterruptedException exp ) {
852+ LOGGER .log (Level .WARNING , "Received interrupt while waiting" +
853+ " for index segment count reduction to finish" , exp );
842854 }
843- return latch ;
844855 }
845856
846857 /**
847- * Optimize the index database.
858+ * Reduce number of segments in the index database.
848859 * @throws IOException I/O exception
849860 */
850- public void optimize () throws IOException {
861+ public void reduceSegmentCount () throws IOException {
851862 synchronized (lock ) {
852863 if (running ) {
853- LOGGER .warning ("Optimize terminated... Someone else is updating / optimizing it !" );
864+ LOGGER .warning ("Segment count reduction terminated... Someone else is running the operation !" );
854865 return ;
855866 }
856867 running = true ;
@@ -861,25 +872,18 @@ public void optimize() throws IOException {
861872 try {
862873 Statistics elapsed = new Statistics ();
863874 String projectDetail = this .project != null ? " for project " + project .getName () : "" ;
864- LOGGER .log (Level .INFO , "Optimizing the index{0}" , projectDetail );
875+ LOGGER .log (Level .INFO , "Reducing number of segments in the index{0}" , projectDetail );
865876 Analyzer analyzer = new StandardAnalyzer ();
866877 IndexWriterConfig conf = new IndexWriterConfig (analyzer );
867878 conf .setOpenMode (OpenMode .CREATE_OR_APPEND );
868879
869880 wrt = new IndexWriter (indexDirectory , conf );
870- wrt .forceMerge (1 ); // this is deprecated and not needed anymore
871- elapsed .report (LOGGER , String .format ("Done optimizing index%s" , projectDetail ),
872- "indexer.db.optimize" );
873- synchronized (lock ) {
874- if (dirtyFile .exists () && !dirtyFile .delete ()) {
875- LOGGER .log (Level .FINE , "Failed to remove \" dirty-file\" : {0}" ,
876- dirtyFile .getAbsolutePath ());
877- }
878- dirty = false ;
879- }
881+ wrt .forceMerge (1 );
882+ elapsed .report (LOGGER , String .format ("Done reducing number of segments in index%s" , projectDetail ),
883+ "indexer.db.reduceSegments" );
880884 } catch (IOException e ) {
881885 writerException = e ;
882- LOGGER .log (Level .SEVERE , "ERROR: optimizing index" , e );
886+ LOGGER .log (Level .SEVERE , "ERROR: reducing number of segments index" , e );
883887 } finally {
884888 if (wrt != null ) {
885889 try {
@@ -925,6 +929,15 @@ private void setDirty() {
925929 }
926930 }
927931
932+ private void unsetDirty () {
933+ synchronized (lock ) {
934+ if (dirtyFile .exists () && !dirtyFile .delete ()) {
935+ LOGGER .log (Level .FINE , "Failed to remove \" dirty-file\" : {0}" , dirtyFile .getAbsolutePath ());
936+ }
937+ dirty = false ;
938+ }
939+ }
940+
928941 private File whatXrefFile (String path , boolean compress ) {
929942 String xrefPath = compress ? TandemPath .join (path , ".gz" ) : path ;
930943 return new File (xrefDir , xrefPath );
@@ -1688,6 +1701,7 @@ private void indexParallel(String dir, IndexDownArgs args) {
16881701 }
16891702 }))).get ();
16901703 } catch (InterruptedException | ExecutionException e ) {
1704+ interrupted = true ;
16911705 int successCount = successCounter .intValue ();
16921706 double successPct = 100.0 * successCount / worksCount ;
16931707 String exmsg = String .format ("%d successes (%.1f%%) after aborting parallel-indexing" ,
@@ -1793,17 +1807,11 @@ public Set<String> getFiles() throws IOException {
17931807 terms = MultiTerms .getTerms (ireader , QueryBuilder .U );
17941808 iter = terms .iterator (); // init uid iterator
17951809 }
1796- while (iter != null && iter .term () != null ) {
1797- String value = iter .term ().utf8ToString ();
1798- if (value .isEmpty ()) {
1799- iter .next ();
1800- continue ;
1801- }
1802-
1803- files .add (Util .uid2url (value ));
1804- BytesRef next = iter .next ();
1805- if (next == null ) {
1806- iter = null ;
1810+ BytesRef term ;
1811+ while (iter != null && (term = iter .next ()) != null ) {
1812+ String value = term .utf8ToString ();
1813+ if (!value .isEmpty ()) {
1814+ files .add (Util .uid2url (value ));
18071815 }
18081816 }
18091817 } finally {
@@ -1840,65 +1848,6 @@ public int getNumFiles() throws IOException {
18401848 }
18411849 }
18421850
1843- static void listFrequentTokens (List <String > subFiles ) throws IOException {
1844- final int limit = 4 ;
1845-
1846- RuntimeEnvironment env = RuntimeEnvironment .getInstance ();
1847- if (env .hasProjects ()) {
1848- if (subFiles == null || subFiles .isEmpty ()) {
1849- for (Project project : env .getProjectList ()) {
1850- IndexDatabase db = new IndexDatabase (project );
1851- db .listTokens (limit );
1852- }
1853- } else {
1854- for (String path : subFiles ) {
1855- Project project = Project .getProject (path );
1856- if (project == null ) {
1857- LOGGER .log (Level .WARNING , "Could not find a project for \" {0}\" " , path );
1858- } else {
1859- IndexDatabase db = new IndexDatabase (project );
1860- db .listTokens (limit );
1861- }
1862- }
1863- }
1864- } else {
1865- IndexDatabase db = new IndexDatabase ();
1866- db .listTokens (limit );
1867- }
1868- }
1869-
1870- public void listTokens (int freq ) throws IOException {
1871- IndexReader ireader = null ;
1872- TermsEnum iter = null ;
1873- Terms terms ;
1874-
1875- try {
1876- ireader = DirectoryReader .open (indexDirectory );
1877- if (ireader .numDocs () > 0 ) {
1878- terms = MultiTerms .getTerms (ireader , QueryBuilder .DEFS );
1879- iter = terms .iterator (); // init uid iterator
1880- }
1881- while (iter != null && iter .term () != null ) {
1882- if (iter .docFreq () > 16 && iter .term ().utf8ToString ().length () > freq ) {
1883- LOGGER .warning (iter .term ().utf8ToString ());
1884- }
1885- BytesRef next = iter .next ();
1886- if (next == null ) {
1887- iter = null ;
1888- }
1889- }
1890- } finally {
1891-
1892- if (ireader != null ) {
1893- try {
1894- ireader .close ();
1895- } catch (IOException e ) {
1896- LOGGER .log (Level .WARNING , "An error occurred while closing index reader" , e );
1897- }
1898- }
1899- }
1900- }
1901-
19021851 /**
19031852 * Get an indexReader for the Index database where a given file.
19041853 *
0 commit comments