Skip to content

Commit bbaef9d

Browse files
authored
handle symlinks for history based reindex (#4532)
fixes #4528
1 parent b4a1a3e commit bbaef9d

File tree

3 files changed

+108
-47
lines changed

3 files changed

+108
-47
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java

+61-42
Original file line numberDiff line numberDiff line change
@@ -800,7 +800,7 @@ public void update() throws IOException {
800800

801801
/**
802802
* The traversal of the uid terms done in {@link #processFile(IndexDownArgs, File, String)}
803-
* and {@link #processFileIncremental(IndexDownArgs, File, String)} needs to skip over deleted documents
803+
* and {@link #processFileHistoryBased(IndexDownArgs, File, String)} needs to skip over deleted documents
804804
* that are often found in multi-segment indexes. This method stores the uids of these documents
805805
* and is expected to be called before the traversal for the top level directory is started.
806806
* @throws IOException if the index cannot be read for some reason
@@ -932,8 +932,11 @@ void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOExcepti
932932
try (Progress progress = new Progress(LOGGER, String.format("collecting files for %s", project),
933933
fileCollector.getFiles().size())) {
934934
for (String path : fileCollector.getFiles()) {
935+
if (isInterrupted()) {
936+
return;
937+
}
935938
File file = new File(sourceRoot, path);
936-
processFileIncremental(args, file, path);
939+
processFileHistoryBased(args, file, path);
937940
progress.increment();
938941
}
939942
}
@@ -1288,6 +1291,7 @@ private static void cleanupResources(Document doc) {
12881291

12891292
/**
12901293
* Check if I should accept this file into the index database.
1294+
* Directories are automatically accepted.
12911295
*
12921296
* @param file the file to check
12931297
* @param ret defined instance whose {@code localRelPath} property will be
@@ -1306,53 +1310,49 @@ private boolean accept(File file, AcceptSymlinkRet ret) {
13061310
}
13071311

13081312
if (!file.canRead()) {
1309-
LOGGER.log(Level.WARNING, "Could not read {0}", absolutePath);
1313+
LOGGER.log(Level.WARNING, "Could not read ''{0}''", absolutePath);
13101314
return false;
13111315
}
13121316

13131317
try {
13141318
Path absolute = Paths.get(absolutePath);
13151319
if (Files.isSymbolicLink(absolute)) {
13161320
File canonical = file.getCanonicalFile();
1317-
if (!absolutePath.equals(canonical.getPath()) &&
1318-
!acceptSymlink(absolute, canonical, ret)) {
1321+
if (!absolutePath.equals(canonical.getPath()) && !acceptSymlink(absolute, canonical, ret)) {
13191322
if (ret.localRelPath == null) {
13201323
LOGGER.log(Level.FINE, "Skipped symlink ''{0}'' -> ''{1}''",
13211324
new Object[] {absolutePath, canonical});
13221325
}
13231326
return false;
13241327
}
13251328
}
1326-
//below will only let go files and directories, anything else is considered special and is not added
1329+
// Below will only let go files and directories, anything else is considered special and is not added.
13271330
if (!file.isFile() && !file.isDirectory()) {
1328-
LOGGER.log(Level.WARNING, "Ignored special file {0}",
1329-
absolutePath);
1331+
LOGGER.log(Level.WARNING, "Ignored special file ''{0}''", absolutePath);
13301332
return false;
13311333
}
13321334
} catch (IOException exp) {
1333-
LOGGER.log(Level.WARNING, "Failed to resolve name: {0}",
1334-
absolutePath);
1335+
LOGGER.log(Level.WARNING, "Failed to resolve name: ''{0}''", absolutePath);
13351336
LOGGER.log(Level.FINE, "Stack Trace: ", exp);
13361337
}
13371338

13381339
if (file.isDirectory()) {
1339-
// always accept directories so that their files can be examined
1340+
// Always accept directories so that their files can be examined.
13401341
return true;
13411342
}
13421343

1343-
13441344
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
13451345
// Lookup history if indexing versioned files only.
13461346
// Skip the lookup entirely (which is expensive) if unversioned files are allowed
13471347
if (env.isIndexVersionedFilesOnly()) {
13481348
if (HistoryGuru.getInstance().hasHistory(file)) {
1349-
// versioned files should always be accepted
1349+
// Versioned files should always be accepted.
13501350
return true;
13511351
}
13521352
LOGGER.log(Level.FINER, "not accepting unversioned {0}", absolutePath);
13531353
return false;
13541354
}
1355-
// unversioned files are allowed
1355+
// Unversioned files are allowed.
13561356
return true;
13571357
}
13581358

@@ -1374,7 +1374,7 @@ private boolean accept(File parent, File file, AcceptSymlinkRet ret) {
13741374
File f1 = parent.getCanonicalFile();
13751375
File f2 = file.getCanonicalFile();
13761376
if (f1.equals(f2)) {
1377-
LOGGER.log(Level.INFO, "Skipping links to itself...: {0} {1}",
1377+
LOGGER.log(Level.INFO, "Skipping links to itself...: ''{0}'' ''{1}''",
13781378
new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()});
13791379
return false;
13801380
}
@@ -1383,15 +1383,15 @@ private boolean accept(File parent, File file, AcceptSymlinkRet ret) {
13831383
File t1 = f1;
13841384
while ((t1 = t1.getParentFile()) != null) {
13851385
if (f2.equals(t1)) {
1386-
LOGGER.log(Level.INFO, "Skipping links to parent...: {0} {1}",
1386+
LOGGER.log(Level.INFO, "Skipping links to parent...: ''{0}'' ''{1}''",
13871387
new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()});
13881388
return false;
13891389
}
13901390
}
13911391

13921392
return accept(file, ret);
13931393
} catch (IOException ex) {
1394-
LOGGER.log(Level.WARNING, "Failed to resolve name: {0} {1}",
1394+
LOGGER.log(Level.WARNING, "Failed to resolve name: ''{0}'' ''{1}''",
13951395
new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()});
13961396
}
13971397
return false;
@@ -1423,7 +1423,7 @@ private boolean acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet re
14231423
if (isLocal(canonical1)) {
14241424
if (!isCanonicalDir) {
14251425
if (LOGGER.isLoggable(Level.FINEST)) {
1426-
LOGGER.log(Level.FINEST, "Local {0} has symlink from {1}",
1426+
LOGGER.log(Level.FINEST, "Local ''{0}'' has symlink from ''{1}''",
14271427
new Object[] {canonical1, absolute1});
14281428
}
14291429
/*
@@ -1452,7 +1452,7 @@ private boolean acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet re
14521452
* the file already -- but we are forced to handle.
14531453
*/
14541454
LOGGER.log(Level.WARNING, String.format(
1455-
"Unexpected error getting relative for %s", canonical), e);
1455+
"Unexpected error getting relative for '%s'", canonical), e);
14561456
absolute0 = absolute1;
14571457
}
14581458
indexed1 = new IndexedSymlink(absolute0, canonical1, true);
@@ -1477,7 +1477,7 @@ private boolean acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet re
14771477
Paths.get(indexed0.getAbsolute())).toString();
14781478

14791479
if (LOGGER.isLoggable(Level.FINEST)) {
1480-
LOGGER.log(Level.FINEST, "External dir {0} has symlink from {1} after first {2}",
1480+
LOGGER.log(Level.FINEST, "External dir ''{0}'' has symlink from ''{1}'' after first ''{2}''",
14811481
new Object[] {canonical1, absolute1, indexed0.getAbsolute()});
14821482
}
14831483
return false;
@@ -1495,7 +1495,7 @@ private boolean acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet re
14951495
if (!isCanonicalDir) {
14961496
if (LOGGER.isLoggable(Level.FINEST)) {
14971497
LOGGER.log(Level.FINEST,
1498-
"External file {0} has symlink from {1} under previous {2}",
1498+
"External file ''{0}'' has symlink from ''{1}'' under previous ''{2}''",
14991499
new Object[] {canonical1, absolute1, absolute0});
15001500
}
15011501
// Do not add to indexedSymlinks for a non-directory.
@@ -1512,7 +1512,7 @@ private boolean acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet re
15121512

15131513
if (LOGGER.isLoggable(Level.FINEST)) {
15141514
LOGGER.log(Level.FINEST,
1515-
"External dir {0} has symlink from {1} under previous {2}",
1515+
"External dir ''{0}'' has symlink from ''{1}'' under previous ''{2}''",
15161516
new Object[] {canonical1, absolute1, absolute0});
15171517
}
15181518
return false;
@@ -1523,7 +1523,7 @@ private boolean acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet re
15231523
for (String canonicalRoot : canonicalRoots) {
15241524
if (canonical1.startsWith(canonicalRoot)) {
15251525
if (LOGGER.isLoggable(Level.FINEST)) {
1526-
LOGGER.log(Level.FINEST, "Allowed symlink {0} per canonical root {1}",
1526+
LOGGER.log(Level.FINEST, "Allowed symlink ''{0}'' per canonical root ''{1}''",
15271527
new Object[] {absolute1, canonical1});
15281528
}
15291529
if (isCanonicalDir) {
@@ -1540,7 +1540,7 @@ private boolean acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet re
15401540
try {
15411541
allowedTarget = new File(allowedSymlink).getCanonicalPath();
15421542
} catch (IOException e) {
1543-
LOGGER.log(Level.FINE, "unresolvable symlink: {0}", allowedSymlink);
1543+
LOGGER.log(Level.FINE, "unresolvable symlink: ''{0}''", allowedSymlink);
15441544
continue;
15451545
}
15461546
/*
@@ -1610,9 +1610,8 @@ private void handleSymlink(String path, AcceptSymlinkRet ret) {
16101610
* {@link #removeFile(boolean)}. New or updated files are noted for indexing.
16111611
* @param dir the root indexDirectory to generate indexes for
16121612
* @param parent path to parent directory
1613-
* @param args arguments to control execution and for collecting a list of
1613+
* @param args arguments to control execution and for collecting a list of files for indexing
16141614
* @param progress {@link Progress} instance
1615-
* files for indexing
16161615
*/
16171616
@VisibleForTesting
16181617
void indexDown(File dir, String parent, IndexDownArgs args, Progress progress) throws IOException {
@@ -1629,8 +1628,7 @@ void indexDown(File dir, String parent, IndexDownArgs args, Progress progress) t
16291628

16301629
File[] files = dir.listFiles();
16311630
if (files == null) {
1632-
LOGGER.log(Level.SEVERE, "Failed to get file listing for: {0}",
1633-
dir.getPath());
1631+
LOGGER.log(Level.SEVERE, "Failed to get file listing for: ''{0}''", dir.getPath());
16341632
return;
16351633
}
16361634
Arrays.sort(files, FILENAME_COMPARATOR);
@@ -1660,7 +1658,7 @@ void indexDown(File dir, String parent, IndexDownArgs args, Progress progress) t
16601658
* @throws IOException on error
16611659
*/
16621660
@VisibleForTesting
1663-
void processFileIncremental(IndexDownArgs args, File file, String path) throws IOException {
1661+
void processFileHistoryBased(IndexDownArgs args, File file, String path) throws IOException {
16641662
final boolean fileExists = file.exists();
16651663

16661664
path = Util.fixPathIfWindows(path);
@@ -1689,9 +1687,7 @@ void processFileIncremental(IndexDownArgs args, File file, String path) throws I
16891687
checkSettings(termFile, termPath);
16901688
if (!matchOK) {
16911689
removeFile(false);
1692-
1693-
args.curCount++;
1694-
args.works.add(new IndexFileWork(termFile, termPath));
1690+
addWorkHistoryBased(args, termFile, termPath);
16951691
}
16961692
} else {
16971693
removeFile(!fileExists);
@@ -1703,10 +1699,33 @@ void processFileIncremental(IndexDownArgs args, File file, String path) throws I
17031699
}
17041700
}
17051701

1706-
// The function would not be called if the file was not changed in some way.
1707-
if (fileExists) {
1702+
// This function would not be called if the file was not changed in some way (including deletion).
1703+
// That said, it is necessary to check whether the file can be accepted. This is done in the function below.
1704+
// Also, allow for broken symbolic links (File.exists() returns false for these).
1705+
if (fileExists || Files.isSymbolicLink(file.toPath())) {
1706+
addWorkHistoryBased(args, file, path);
1707+
}
1708+
}
1709+
1710+
/**
1711+
* Check if file can be accepted into the index database. If yes, change the {@code args} argument appropriately.
1712+
* @param args {@link IndexDownArgs} instance to which an entry will be added if deemed acceptable
1713+
* @param file file object
1714+
* @param path path of the file relative to given source root (not necessarily global source root)
1715+
*/
1716+
private void addWorkHistoryBased(IndexDownArgs args, File file, String path) {
1717+
AcceptSymlinkRet ret = new AcceptSymlinkRet();
1718+
if (accept(file, ret)) {
1719+
// accept() returns true for directories because it was made to work with indexDown().
1720+
if (file.isDirectory()) {
1721+
LOGGER.log(Level.FINER, "not accepting directory ''{0}'' into the index", file);
1722+
return;
1723+
}
1724+
17081725
args.curCount++;
17091726
args.works.add(new IndexFileWork(file, path));
1727+
} else {
1728+
handleSymlink(file.getParent(), ret);
17101729
}
17111730
}
17121731

@@ -2210,7 +2229,7 @@ boolean checkSettings(File file, String path) throws IOException {
22102229
project.getTabSize() : 0;
22112230
Integer actTabSize = settings.getTabSize();
22122231
if (actTabSize != null && !actTabSize.equals(reqTabSize)) {
2213-
LOGGER.log(Level.FINE, "Tabsize mismatch: {0}", path);
2232+
LOGGER.log(Level.FINE, "Tabsize mismatch: ''{0}''", path);
22142233
return false;
22152234
}
22162235

@@ -2222,7 +2241,7 @@ boolean checkSettings(File file, String path) throws IOException {
22222241
// Read a limited-fields version of the document.
22232242
Document doc = storedFields.document(postsIter.docID(), CHECK_FIELDS);
22242243
if (doc == null) {
2225-
LOGGER.log(Level.FINER, "No Document: {0}", path);
2244+
LOGGER.log(Level.FINER, "No Document for ''{0}''", path);
22262245
continue;
22272246
}
22282247

@@ -2243,7 +2262,7 @@ boolean checkSettings(File file, String path) throws IOException {
22432262
fileTypeName = doc.get(QueryBuilder.TYPE);
22442263
if (fileTypeName == null) {
22452264
// (Should not get here, but break just in case.)
2246-
LOGGER.log(Level.FINEST, "Missing TYPE field: {0}", path);
2265+
LOGGER.log(Level.FINEST, "Missing TYPE field: ''{0}''", path);
22472266
break;
22482267
}
22492268

@@ -2257,14 +2276,14 @@ boolean checkSettings(File file, String path) throws IOException {
22572276
* selection of analyzer or return a value to indicate the
22582277
* analyzer is now mis-matched.
22592278
*/
2260-
LOGGER.log(Level.FINER, "Guru version mismatch: {0}", path);
2279+
LOGGER.log(Level.FINER, "Guru version mismatch: ''{0}''", path);
22612280

22622281
fa = getAnalyzerFor(file, path);
22632282
fileTypeName = fa.getFileTypeName();
22642283
String oldTypeName = doc.get(QueryBuilder.TYPE);
22652284
if (!fileTypeName.equals(oldTypeName)) {
22662285
if (LOGGER.isLoggable(Level.FINE)) {
2267-
LOGGER.log(Level.FINE, "Changed {0} to {1}: {2}",
2286+
LOGGER.log(Level.FINE, "Changed {0} to {1}: ''{2}''",
22682287
new Object[]{oldTypeName, fileTypeName, path});
22692288
}
22702289
return false;
@@ -2276,7 +2295,7 @@ boolean checkSettings(File file, String path) throws IOException {
22762295
Long actVersion = settings.getAnalyzerVersion(fileTypeName);
22772296
if (actVersion == null || !actVersion.equals(reqVersion)) {
22782297
if (LOGGER.isLoggable(Level.FINE)) {
2279-
LOGGER.log(Level.FINE, "{0} version mismatch: {1}",
2298+
LOGGER.log(Level.FINE, "{0} version mismatch: ''{1}''",
22802299
new Object[]{fileTypeName, path});
22812300
}
22822301
return false;
@@ -2290,14 +2309,14 @@ boolean checkSettings(File file, String path) throws IOException {
22902309
break;
22912310
}
22922311
if (n < 1) {
2293-
LOGGER.log(Level.FINER, "Missing index Documents: {0}", path);
2312+
LOGGER.log(Level.FINER, "Missing index Documents: ''{0}''", path);
22942313
return false;
22952314
}
22962315

22972316
// If the economy mode is on, this should be treated as a match.
22982317
if (!env.isGenerateHtml()) {
22992318
if (xrefExistsFor(path)) {
2300-
LOGGER.log(Level.FINEST, "Extraneous {0} , removing its xref file", path);
2319+
LOGGER.log(Level.FINEST, "Extraneous ''{0}'' , removing its xref file", path);
23012320
removeXrefFile(path);
23022321
}
23032322
return true;

0 commit comments

Comments
 (0)