Skip to content

Commit 0cc7e17

Browse files
committed
Extract CtagsReader from Ctags to support testing
1 parent 133a606 commit 0cc7e17

File tree

2 files changed

+297
-199
lines changed

2 files changed

+297
-199
lines changed

src/org/opensolaris/opengrok/analysis/Ctags.java

+9-199
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
2223
*/
2324
package org.opensolaris.opengrok.analysis;
2425

@@ -30,14 +31,12 @@
3031
import java.io.OutputStreamWriter;
3132
import java.io.StringReader;
3233
import java.util.ArrayList;
33-
import java.util.EnumMap;
3434
import java.util.List;
3535
import java.util.logging.Level;
3636
import java.util.logging.Logger;
3737
import org.opensolaris.opengrok.configuration.RuntimeEnvironment;
3838
import org.opensolaris.opengrok.logger.LoggerFactory;
3939
import org.opensolaris.opengrok.util.IOUtils;
40-
import org.opensolaris.opengrok.util.Interner;
4140

4241
/**
4342
* Provides Ctags by having a running instance of ctags
@@ -57,10 +56,6 @@ public class Ctags {
5756
private String CTagsExtraOptionsFile = null;
5857
private ProcessBuilder processBuilder;
5958

60-
private final int MIN_METHOD_LINE_LENGTH = 6; //this means basically empty method body in tags, so skip it
61-
private final int MAX_METHOD_LINE_LENGTH = 1030; //96 is used by universal ctags for some lines, but it's too low, OpenGrok can theoretically handle 50000 with 8G heap
62-
// also this might break scopes functionality, if set too low
63-
6459
private boolean junit_testing = false;
6560

6661
public void setBinary(String binary) {
@@ -333,8 +328,9 @@ public Definitions doCtags(String file) throws IOException {
333328
//log.fine("doing >" + file + "<");
334329
ctagsIn.write(file);
335330
ctagsIn.flush();
336-
ret = new Definitions();
337-
readTags(ret);
331+
CtagsReader rdr = new CtagsReader();
332+
readTags(rdr);
333+
ret = rdr.getDefinitions();
338334
}
339335

340336
return ret;
@@ -381,71 +377,13 @@ public void destroy() {
381377
}
382378
};
383379

384-
Definitions ret;
385-
ret = new Definitions();
386-
readTags(ret);
380+
CtagsReader rdr = new CtagsReader();
381+
readTags(rdr);
382+
Definitions ret = rdr.getDefinitions();
387383
return ret;
388384
}
389385

390-
// this should mimic https://github.com/universal-ctags/ctags/blob/master/docs/format.rst
391-
// or http://ctags.sourceforge.net/FORMAT (for backwards compatibility)
392-
//uncomment only those that are used ... (to avoid populating the hashmap for every record)
393-
public enum tagFields {
394-
// ARITY("arity"),
395-
CLASS("class"),
396-
// INHERIT("inherit"), //this is not defined in above format docs, but both universal and exuberant ctags use it
397-
// INTERFACE("interface"), //this is not defined in above format docs, but both universal and exuberant ctags use it
398-
// ENUM("enum"),
399-
// FILE("file"),
400-
// FUNCTION("function"),
401-
// KIND("kind"),
402-
LINE("line"),
403-
// NAMESPACE("namespace"), //this is not defined in above format docs, but both universal and exuberant ctags use it
404-
// PROGRAM("program"), //this is not defined in above format docs, but both universal and exuberant ctags use it
405-
SIGNATURE("signature");
406-
// STRUCT("struct"),
407-
// TYPEREF("typeref"),
408-
// UNION("union");
409-
410-
//NOTE: if you edit above, always consult below charCmpEndOffset
411-
private final String name;
412-
413-
tagFields(String name) {
414-
this.name = name;
415-
}
416-
417-
//this is very important, we only compare that amount of chars from field types with input to save time,
418-
//this number has to be long enough to get rid of disambiguation (so currently 2 characters)
419-
//TODO:
420-
//NOTE this is a big tradeoff in terms of input data, e.g. field "find"
421-
//will be considered "file" and overwrite the value, so if ctags will send us buggy input
422-
//we will output buggy data TOO!
423-
//NO VALIDATION happens of input - but then we gain LOTS of speed, due to not comparing the same field names again and again fully
424-
// 1 - means only 2 first chars are compared
425-
public static int charCmpEndOffset = 0; // make this MAX. 8 chars! (backwards compat to DOS/Win )
426-
427-
//quickly get if the field name matches allowed/consumed ones
428-
public static Ctags.tagFields quickValueOf(String fullName) {
429-
int i;
430-
boolean match;
431-
for (tagFields x : tagFields.values()) {
432-
match = true;
433-
for (i = 0; i <= charCmpEndOffset; i++) {
434-
if (x.name.charAt(i) != fullName.charAt(i)) {
435-
match = false;
436-
break;
437-
}
438-
}
439-
if (match) {
440-
return x;
441-
}
442-
}
443-
return null;
444-
}
445-
}
446-
447-
private void readTags(Definitions defs) {
448-
EnumMap<tagFields, String> fields = new EnumMap<>(tagFields.class);
386+
private void readTags(CtagsReader reader) {
449387
try {
450388
do {
451389
String tagLine = ctagsOut.readLine();
@@ -476,139 +414,11 @@ private void readTags(Definitions defs) {
476414
return;
477415
}
478416

479-
int p = tagLine.indexOf('\t');
480-
if (p <= 0) {
481-
//log.fine("SKIPPING LINE - NO TAB");
482-
continue;
483-
}
484-
String def = tagLine.substring(0, p);
485-
int mstart = tagLine.indexOf('\t', p + 1);
486-
487-
String kind = null;
488-
489-
int lp = tagLine.length();
490-
while ((p = tagLine.lastIndexOf('\t', lp - 1)) > 0) {
491-
//log.fine(" p = " + p + " lp = " + lp);
492-
String fld = tagLine.substring(p + 1, lp);
493-
//log.fine("FIELD===" + fld);
494-
lp = p;
495-
496-
int sep = fld.indexOf(':');
497-
if (sep != -1) {
498-
tagFields pos = tagFields.quickValueOf(fld);
499-
if (pos != null) {
500-
String val = fld.substring(sep + 1);
501-
fields.put(pos, val);
502-
} else {
503-
//unknown field name
504-
//don't log on purpose, since we don't consume all possible fields, so just ignore this error for now
505-
// LOGGER.log(Level.WARNING, "Unknown field name found: {0}", fld.substring(0, sep - 1));
506-
}
507-
} else {
508-
//TODO no separator, assume this is the kind
509-
kind = fld;
510-
break;
511-
}
512-
}
513-
514-
String lnum = fields.get(tagFields.LINE);
515-
String signature = fields.get(tagFields.SIGNATURE);
516-
String classInher = fields.get(tagFields.CLASS);
517-
518-
final String match;
519-
int mlength = p - mstart;
520-
if ((p > 0) && (mlength > MIN_METHOD_LINE_LENGTH)) {
521-
if (mlength < MAX_METHOD_LINE_LENGTH) {
522-
match = tagLine.substring(mstart + 3, p - 4).
523-
replace("\\/", "/").replaceAll("[ \t]+", " "); //TODO per format we should also recognize \r and \n and \\
524-
} else {
525-
LOGGER.log(Level.FINEST, "Ctags: stripping method body for def {0} line {1}(scopes/highlight might break)", new Object[]{def, lnum});
526-
match = tagLine.substring(mstart + 3, mstart + MAX_METHOD_LINE_LENGTH - 1). // +3 - 4 = -1
527-
replace("\\/", "/").replaceAll("[ \t]+", " ");
528-
}
529-
} else { //tag is in wrong format, cannot extract tagaddress from it, skip
530-
continue;
531-
}
532-
533-
// Bug #809: Keep track of which symbols have already been
534-
// seen to prevent duplicating them in memory.
535-
final Interner<String> seenSymbols = new Interner<>();
536-
537-
final String type
538-
= classInher == null ? kind : kind + " in " + classInher;
539-
addTag(defs, seenSymbols, lnum, def, type, match, classInher, signature);
540-
if (signature != null) {
541-
//TODO if some languages use different character for separating arguments, below needs to be adjusted
542-
String[] args = signature.split(",");
543-
for (String arg : args) {
544-
//TODO this algorithm assumes that data types occur to
545-
// the left of the argument name, so it will not
546-
// work for languages like rust, kotlin, etc. which
547-
// place the data type to the right of the argument name.
548-
// Need an attribute from ctags to indicate data type location.
549-
// ----------------------------------------------------------------
550-
// When no assignment of default values,
551-
// expecting: <type> <name>, or <name>
552-
//
553-
// When default value assignment applied to parameter,
554-
// expecting: <type> <name> = <value> or
555-
// <name> = <value>
556-
// (Note whitespace content made irrelevant)
557-
558-
// Need to ditch the default assignment value
559-
// so that the extraction loop below will work.
560-
// This assumes all languages use '=' to assign value.
561-
562-
if (arg.indexOf("=") != -1) {
563-
String[] a = arg.split("=");
564-
arg = a[0]; // throws away assigned value
565-
}
566-
567-
// Strip out all non 'word' class symbols
568-
// which leaves just names intact.
569-
String [] names = arg.trim().split("[\\W]");
570-
String name;
571-
572-
// Walk the array backwards from the end and
573-
// the parameter name should always be the first
574-
// non-empty element encountered.
575-
for (int ii=names.length-1; ii >= 0; ii--) {
576-
name = names[ii];
577-
if (name.length() > 0) {
578-
addTag(defs, seenSymbols, lnum, name, "argument",
579-
def.trim() + signature.trim(), null, signature);
580-
break;
581-
}
582-
}
583-
}
584-
}
585-
//log.fine("Read = " + def + " : " + lnum + " = " + kind + " IS " + inher + " M " + match);
586-
fields.clear();
417+
reader.readLine(tagLine);
587418
} while (true);
588419
} catch (Exception e) {
589420
LOGGER.log(Level.WARNING, "CTags parsing problem: ", e);
590421
}
591422
LOGGER.severe("CTag reader cycle was interrupted!");
592423
}
593-
594-
/**
595-
* Add a tag to a {@code Definitions} instance.
596-
*/
597-
private void addTag(Definitions defs, Interner<String> seenSymbols,
598-
String lnum, String symbol, String type, String text, String namespace, String signature) {
599-
// The strings are frequently repeated (a symbol can be used in
600-
// multiple definitions, multiple definitions can have the same type,
601-
// one line can contain multiple definitions). Intern them to minimize
602-
// the space consumed by them (see bug #809).
603-
int lineno = 0;
604-
try {
605-
lineno = Integer.parseInt(lnum);
606-
} catch (NumberFormatException nfe) {
607-
LOGGER.log(Level.WARNING, "CTags line number parsing problem(but I will continue with line # 0) for symbol {0}", symbol);
608-
}
609-
defs.addTag(lineno, seenSymbols.intern(symbol.trim()),
610-
seenSymbols.intern(type.trim()), seenSymbols.intern(text.trim()),
611-
namespace == null ? null : seenSymbols.intern(namespace.trim()), signature);
612-
613-
}
614424
}

0 commit comments

Comments
 (0)