Skip to content

Commit 6bb48a9

Browse files
authored
fix annotation regression (#4516)
use Document to get the file type when checking if it is eligible for annotation use document as a negative annotation capability check. if no document, fallback to repository check. fixes #4515
1 parent ba9e18d commit 6bb48a9

File tree

5 files changed

+161
-54
lines changed

5 files changed

+161
-54
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/AnalyzerGuru.java

+14-4
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2017, 2021, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.analysis;
@@ -624,9 +624,9 @@ public void populateDocument(Document doc, File file, String path, AbstractAnaly
624624
}
625625

626626
if (fa != null) {
627-
AbstractAnalyzer.Genre g = fa.getGenre();
628-
if (g == AbstractAnalyzer.Genre.PLAIN || g == AbstractAnalyzer.Genre.XREFABLE || g == AbstractAnalyzer.Genre.HTML) {
629-
doc.add(new Field(QueryBuilder.T, g.typeName(), string_ft_stored_nanalyzed_norms));
627+
AbstractAnalyzer.Genre genre = fa.getGenre();
628+
if (isXrefable(genre.typeName())) {
629+
doc.add(new Field(QueryBuilder.T, genre.typeName(), string_ft_stored_nanalyzed_norms));
630630
}
631631
fa.analyze(doc, StreamSource.fromFile(file), xrefOut);
632632

@@ -635,6 +635,16 @@ public void populateDocument(Document doc, File file, String path, AbstractAnaly
635635
}
636636
}
637637

638+
/**
639+
* @param genreName genre name
640+
* @return whether it is possible to produce xref for the genre
641+
*/
642+
public static boolean isXrefable(String genreName) {
643+
return (genreName.equals(AbstractAnalyzer.Genre.PLAIN.typeName())
644+
|| genreName.equals(AbstractAnalyzer.Genre.XREFABLE.typeName())
645+
|| genreName.equals(AbstractAnalyzer.Genre.HTML.typeName()));
646+
}
647+
638648
private static void populateDocumentHistory(Document doc, File file) {
639649
try {
640650
HistoryGuru histGuru = HistoryGuru.getInstance();

opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java

+64-30
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2017, 2020, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.history;
@@ -47,10 +47,10 @@
4747
import java.util.logging.Logger;
4848
import java.util.stream.Collectors;
4949

50+
import org.apache.lucene.document.Document;
51+
import org.apache.lucene.queryparser.classic.ParseException;
5052
import org.jetbrains.annotations.Nullable;
5153
import org.jetbrains.annotations.VisibleForTesting;
52-
import org.opengrok.indexer.analysis.AbstractAnalyzer;
53-
import org.opengrok.indexer.analysis.AnalyzerGuru;
5454
import org.opengrok.indexer.configuration.CommandTimeoutType;
5555
import org.opengrok.indexer.configuration.Configuration;
5656
import org.opengrok.indexer.configuration.Configuration.RemoteSCM;
@@ -59,11 +59,15 @@
5959
import org.opengrok.indexer.configuration.RuntimeEnvironment;
6060
import org.opengrok.indexer.logger.LoggerFactory;
6161
import org.opengrok.indexer.search.DirectoryEntry;
62+
import org.opengrok.indexer.search.QueryBuilder;
6263
import org.opengrok.indexer.util.ForbiddenSymlinkException;
6364
import org.opengrok.indexer.util.PathUtils;
6465
import org.opengrok.indexer.util.Progress;
6566
import org.opengrok.indexer.util.Statistics;
6667

68+
import static org.opengrok.indexer.analysis.AnalyzerGuru.isXrefable;
69+
import static org.opengrok.indexer.index.IndexDatabase.getDocument;
70+
6771
/**
6872
* The HistoryGuru is used to implement an transparent layer to the various
6973
* source control systems.
@@ -229,10 +233,14 @@ public String getAnnotationCacheInfo() {
229233
*/
230234
@Nullable
231235
private Annotation getAnnotation(File file, @Nullable String rev, boolean fallback) throws IOException {
232-
Annotation annotation;
233-
234236
Repository repository = getRepository(file);
235-
if (annotationCache != null && repository != null && repository.isAnnotationCacheEnabled()) {
237+
if (repository == null) {
238+
LOGGER.log(Level.FINER, "no repository found for ''{0}'' to check for annotation", file);
239+
return null;
240+
}
241+
242+
Annotation annotation;
243+
if (annotationCache != null && repository.isAnnotationCacheEnabled()) {
236244
try {
237245
annotation = annotationCache.get(file, rev);
238246
if (annotation != null) {
@@ -248,9 +256,13 @@ private Annotation getAnnotation(File file, @Nullable String rev, boolean fallba
248256
return null;
249257
}
250258

259+
if (!HistoryGuru.getInstance().hasAnnotation(file)) {
260+
LOGGER.log(Level.FINER, "skipped getting annotation for file ''{0}}''", file);
261+
return null;
262+
}
263+
251264
// Fall back to repository based annotation.
252-
// It might be possible to store the annotation to the annotation cache here, needs further thought.
253-
annotation = getAnnotationFromRepository(file, rev);
265+
annotation = getAnnotationFromRepository(file, rev, repository);
254266
if (annotation != null) {
255267
annotation.setRevision(LatestRevisionUtil.getLatestRevision(file));
256268
}
@@ -260,24 +272,21 @@ private Annotation getAnnotation(File file, @Nullable String rev, boolean fallba
260272

261273
/**
262274
* Annotate given file using repository method. Makes sure that the resulting annotation has the revision set.
275+
* Assumes the {@link HistoryGuru#hasAnnotation(File)} check was already done.
263276
* @param file file object to generate the annotation for
264277
* @param rev revision to get the annotation for or {@code null} for latest revision of given file
278+
* @param repository {@link Repository} instance
265279
* @return annotation object or {@code null}
266280
* @throws IOException on error when getting the annotation
267281
*/
268282
@Nullable
269-
private Annotation getAnnotationFromRepository(File file, @Nullable String rev) throws IOException {
283+
private Annotation getAnnotationFromRepository(File file, @Nullable String rev, Repository repository) throws IOException {
270284
if (!env.getPathAccepter().accept(file)) {
271285
LOGGER.log(Level.FINEST, "file ''{0}'' not accepted for annotation", file);
272286
return null;
273287
}
274288

275-
Repository repository = getRepository(file);
276-
if (repository != null && hasAnnotation(file)) {
277-
return repository.annotate(file, rev);
278-
}
279-
280-
return null;
289+
return repository.annotate(file, rev);
281290
}
282291

283292
/**
@@ -681,35 +690,60 @@ public boolean hasHistoryCacheForFile(File file) {
681690
}
682691

683692
/**
684-
* Check if we can annotate the specified file.
685-
*
693+
* Check if annotation can be produced for the specified file. If related document is specified,
694+
* it will be used for negative check. If the document indicates that the type of file is xref-able
695+
* or the document is {@code null}, the capability to produce annotation for the file will be checked
696+
* in related repository.
686697
* @param file the file to check
687-
* @return whether the file is under version control, can be annotated and the
688-
* version control system supports annotation
698+
* @param document {@link Document} object related to the file, can be {@code null}.
699+
* @return whether the file can be annotated
689700
*/
690-
public boolean hasAnnotation(File file) {
701+
public boolean hasAnnotation(File file, @Nullable Document document) {
691702
if (file.isDirectory()) {
692-
LOGGER.log(Level.FINEST, "no annotations for directories (''{0}'') to check annotation presence",
693-
file);
703+
LOGGER.log(Level.FINEST, "no annotations for directories (''{0}'')", file);
694704
return false;
695705
}
696706

697-
AbstractAnalyzer.Genre genre = AnalyzerGuru.getGenre(file.toString());
698-
if (genre == null) {
699-
LOGGER.log(Level.INFO, "will not produce annotation for ''{0}'' with unknown genre", file);
700-
return false;
707+
if (document != null) {
708+
// The "T" field is added to the document currently only for xref-able input data,
709+
// however it does not hurt to check in case this will change.
710+
String fileType = document.get(QueryBuilder.T);
711+
if (fileType == null || !isXrefable(fileType)) {
712+
LOGGER.log(Level.FINEST, "no file type found in document for ''{0}'' or not xref-able", file);
713+
return false;
714+
}
701715
}
702-
if (genre.equals(AbstractAnalyzer.Genre.DATA) || genre.equals(AbstractAnalyzer.Genre.IMAGE)) {
703-
LOGGER.log(Level.INFO, "no sense to produce annotation for binary file ''{0}''", file);
716+
717+
return hasAnnotationInRepo(file);
718+
}
719+
720+
/**
721+
* Check if annotation can be produced for the specified file. Wrapper of {@link #hasAnnotation(File, Document)}
722+
* @param file the file to check
723+
* @return whether the file can be annotated
724+
*/
725+
public boolean hasAnnotation(File file) {
726+
if (file.isDirectory()) {
727+
LOGGER.log(Level.FINEST, "no annotations for directories (''{0}'')", file);
704728
return false;
705729
}
706730

731+
Document document = null;
732+
try {
733+
document = getDocument(file);
734+
} catch (ParseException | IOException e) {
735+
LOGGER.log(Level.FINEST, String.format("cannot get document for '%s' to check annotation", file), e);
736+
}
737+
738+
return hasAnnotation(file, document);
739+
}
740+
741+
private boolean hasAnnotationInRepo(File file) {
707742
Repository repo = getRepository(file);
708743
if (repo == null) {
709744
LOGGER.log(Level.FINEST, "cannot find repository for ''{0}'' to check annotation presence", file);
710745
return false;
711746
}
712-
713747
if (!repo.isWorking()) {
714748
LOGGER.log(Level.FINEST, "repository {0} for ''{1}'' is not working to check annotation presence",
715749
new Object[]{repo, file});
@@ -1116,7 +1150,7 @@ public void createAnnotationCache(File file, String latestRev) throws CacheExcep
11161150
LOGGER.log(Level.FINEST, "creating annotation cache for ''{0}''", file);
11171151
try {
11181152
Statistics statistics = new Statistics();
1119-
Annotation annotation = getAnnotationFromRepository(file, null);
1153+
Annotation annotation = getAnnotationFromRepository(file, null, repository);
11201154
statistics.report(LOGGER, Level.FINEST, String.format("retrieved annotation for ''%s''", file),
11211155
"annotation.retrieve.latency");
11221156

opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java

+15-6
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2008, 2024, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2017, 2020, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.index;
@@ -1229,23 +1229,32 @@ private void addFile(File file, String path, Ctags ctags) throws IOException, In
12291229

12301230
setDirty();
12311231

1232+
createAnnotationCache(file, doc);
1233+
1234+
for (IndexChangedListener listener : listeners) {
1235+
listener.fileAdded(path, fa.getClass().getSimpleName());
1236+
}
1237+
}
1238+
1239+
private static void createAnnotationCache(File file, Document doc) {
1240+
if (!HistoryGuru.getInstance().hasAnnotation(file, doc)) {
1241+
LOGGER.log(Level.FINER, "skipped creating annotation cache for file ''{0}}''", file);
1242+
return;
1243+
}
1244+
12321245
String lastRev = doc.get(QueryBuilder.LASTREV);
12331246
if (lastRev != null) {
12341247
try {
12351248
// The last revision should be fresh. Using LatestRevisionUtil#getLatestRevision()
12361249
// would not work here, because it uses IndexDatabase#getDocument() and the index searcher used therein
1237-
// does not know about updated document yet, so stale revision would be returned.
1250+
// does not know about the updated document yet, so stale revision would be returned.
12381251
// Instead, use the last revision (retrieved from the history in the populateDocument()
12391252
// call above) directly.
12401253
HistoryGuru.getInstance().createAnnotationCache(file, lastRev);
12411254
} catch (CacheException e) {
12421255
LOGGER.log(e.getLevel(), "failed to create annotation", e);
12431256
}
12441257
}
1245-
1246-
for (IndexChangedListener listener : listeners) {
1247-
listener.fileAdded(path, fa.getClass().getSimpleName());
1248-
}
12491258
}
12501259

12511260
private AbstractAnalyzer getAnalyzerFor(File file, String path)

opengrok-indexer/src/test/java/org/opengrok/indexer/history/HistoryGuruTest.java

+64-8
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2008, 2024, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2019, 2020, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.history;
2525

26+
import static org.junit.jupiter.api.Assertions.assertAll;
2627
import static org.junit.jupiter.api.Assertions.assertEquals;
2728
import static org.junit.jupiter.api.Assertions.assertFalse;
2829
import static org.junit.jupiter.api.Assertions.assertNotEquals;
@@ -46,15 +47,24 @@
4647
import java.util.List;
4748
import java.util.stream.Collectors;
4849

50+
import org.apache.lucene.document.Document;
51+
import org.apache.lucene.document.Field;
52+
import org.apache.lucene.document.FieldType;
53+
import org.apache.lucene.document.StringField;
54+
import org.apache.lucene.index.IndexNotFoundException;
4955
import org.junit.jupiter.api.AfterAll;
5056
import org.junit.jupiter.api.AfterEach;
5157
import org.junit.jupiter.api.BeforeAll;
5258
import org.junit.jupiter.api.Test;
5359
import org.junit.jupiter.params.ParameterizedTest;
5460
import org.junit.jupiter.params.provider.ValueSource;
61+
import org.opengrok.indexer.analysis.AbstractAnalyzer;
62+
import org.opengrok.indexer.analysis.AnalyzerGuru;
5563
import org.opengrok.indexer.condition.EnabledForRepository;
5664
import org.opengrok.indexer.configuration.RuntimeEnvironment;
65+
import org.opengrok.indexer.index.IndexDatabase;
5766
import org.opengrok.indexer.search.DirectoryEntry;
67+
import org.opengrok.indexer.search.QueryBuilder;
5868
import org.opengrok.indexer.util.FileUtilities;
5969
import org.opengrok.indexer.util.TestRepository;
6070

@@ -71,6 +81,8 @@ class HistoryGuruTest {
7181

7282
private static int savedNestingMaximum;
7383

84+
HistoryGuru instance = HistoryGuru.getInstance();
85+
7486
@BeforeAll
7587
static void setUpClass() throws Exception {
7688
env = RuntimeEnvironment.getInstance();
@@ -139,14 +151,57 @@ void testBug16465() throws HistoryException, IOException {
139151
}
140152
}
141153

142-
@Test
143-
void testAnnotationSmokeTest() throws Exception {
144-
HistoryGuru instance = HistoryGuru.getInstance();
145-
for (File f : FILES) {
146-
if (instance.hasAnnotation(f)) {
147-
assertNotNull(instance.annotate(f, null));
148-
}
154+
@ParameterizedTest
155+
@ValueSource(booleans = {true, false})
156+
void testHasAnnotationWithDocument(boolean isXrefable) {
157+
File file = Paths.get(repository.getSourceRoot(), "git", "main.c").toFile();
158+
assertTrue(file.isFile());
159+
Document document = new Document();
160+
String typeName;
161+
if (isXrefable) {
162+
typeName = AbstractAnalyzer.Genre.PLAIN.typeName();
163+
} else {
164+
typeName = AbstractAnalyzer.Genre.DATA.typeName();
149165
}
166+
assertEquals(isXrefable, AnalyzerGuru.isXrefable(typeName));
167+
document.add(new Field(QueryBuilder.T, typeName, new FieldType(StringField.TYPE_STORED)));
168+
169+
/*
170+
* This test class does not perform the 2nd phase of indexing, therefore getDocument() for any file
171+
* should result in exception. This differentiates the two hasAnnotation() implementations.
172+
*/
173+
assertThrows(IndexNotFoundException.class, () -> IndexDatabase.getDocument(file));
174+
assertTrue(instance.hasAnnotation(file));
175+
assertEquals(isXrefable, instance.hasAnnotation(file, document));
176+
}
177+
178+
/**
179+
* Check that {@link HistoryGuru#hasAnnotation(File, Document)} falls back to repository check
180+
* if the document is {@code null}. Complements the {@link #testHasAnnotationWithDocument(boolean)} test.
181+
*/
182+
@Test
183+
void testHasAnnotationWithoutDocument() {
184+
File file = Paths.get(repository.getSourceRoot(), "git", "main.c").toFile();
185+
assertTrue(file.isFile());
186+
Repository repositoryForGit = HistoryGuru.getInstance().getRepository(file);
187+
assertNotNull(repositoryForGit);
188+
assertTrue(repositoryForGit.isWorking());
189+
var repoHasAnnotation = repositoryForGit.fileHasAnnotation(file);
190+
assertAll(() -> assertEquals(repoHasAnnotation, instance.hasAnnotation(file, null)),
191+
() -> assertEquals(repoHasAnnotation, instance.hasAnnotation(file)));
192+
}
193+
194+
/**
195+
* Simple smoke test for hasAnnotation(). Because of the way how {@link HistoryGuru#hasAnnotation(File)}
196+
* works and given this test class does not perform 2nd phase of the indexing, there will be some binary
197+
* files in the list. These should not be included if the 2nd indexing phase was done as the
198+
* {@link HistoryGuru#hasAnnotation(File)} should use the index document for negative check.
199+
*/
200+
@Test
201+
void testHasAnnotationSmokeTest() {
202+
// If hasAnnotation() returns true for a file, it should be possible to actually construct the annotation.
203+
List<File> filesWithAnnotation = FILES.stream().filter(instance::hasAnnotation).collect(Collectors.toList());
204+
assertAll(filesWithAnnotation.stream().map(f -> () -> assertNotNull(instance.annotate(f, null))));
150205
}
151206

152207
/**
@@ -184,6 +239,7 @@ void testAnnotationFallback() throws Exception {
184239
assertNotNull(latestRev);
185240
instance.createAnnotationCache(file, latestRev);
186241
Repository repository = instance.getRepository(file);
242+
assertNotNull(repository);
187243

188244
// Ensure the annotation is loaded from the cache by moving the dot git directory away.
189245
final String tmpDirName = "gitdisabled";

0 commit comments

Comments
 (0)