19
19
20
20
/*
21
21
* Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
22
+ * Portions Copyright (c) 2017, Chris Fraire <[email protected] >.
22
23
*/
23
24
package org .opensolaris .opengrok .analysis ;
24
25
30
31
import java .io .OutputStreamWriter ;
31
32
import java .io .StringReader ;
32
33
import java .util .ArrayList ;
33
- import java .util .EnumMap ;
34
34
import java .util .List ;
35
35
import java .util .logging .Level ;
36
36
import java .util .logging .Logger ;
37
37
import org .opensolaris .opengrok .configuration .RuntimeEnvironment ;
38
38
import org .opensolaris .opengrok .logger .LoggerFactory ;
39
39
import org .opensolaris .opengrok .util .IOUtils ;
40
- import org .opensolaris .opengrok .util .Interner ;
41
40
42
41
/**
43
42
* Provides Ctags by having a running instance of ctags
@@ -57,10 +56,6 @@ public class Ctags {
57
56
private String CTagsExtraOptionsFile = null ;
58
57
private ProcessBuilder processBuilder ;
59
58
60
- private final int MIN_METHOD_LINE_LENGTH = 6 ; //this means basically empty method body in tags, so skip it
61
- private final int MAX_METHOD_LINE_LENGTH = 1030 ; //96 is used by universal ctags for some lines, but it's too low, OpenGrok can theoretically handle 50000 with 8G heap
62
- // also this might break scopes functionality, if set too low
63
-
64
59
private boolean junit_testing = false ;
65
60
66
61
public void setBinary (String binary ) {
@@ -333,8 +328,9 @@ public Definitions doCtags(String file) throws IOException {
333
328
//log.fine("doing >" + file + "<");
334
329
ctagsIn .write (file );
335
330
ctagsIn .flush ();
336
- ret = new Definitions ();
337
- readTags (ret );
331
+ CtagsReader rdr = new CtagsReader ();
332
+ readTags (rdr );
333
+ ret = rdr .getDefinitions ();
338
334
}
339
335
340
336
return ret ;
@@ -381,71 +377,13 @@ public void destroy() {
381
377
}
382
378
};
383
379
384
- Definitions ret ;
385
- ret = new Definitions ( );
386
- readTags ( ret );
380
+ CtagsReader rdr = new CtagsReader () ;
381
+ readTags ( rdr );
382
+ Definitions ret = rdr . getDefinitions ( );
387
383
return ret ;
388
384
}
389
385
390
- // this should mimic https://github.com/universal-ctags/ctags/blob/master/docs/format.rst
391
- // or http://ctags.sourceforge.net/FORMAT (for backwards compatibility)
392
- //uncomment only those that are used ... (to avoid populating the hashmap for every record)
393
- public enum tagFields {
394
- // ARITY("arity"),
395
- CLASS ("class" ),
396
- // INHERIT("inherit"), //this is not defined in above format docs, but both universal and exuberant ctags use it
397
- // INTERFACE("interface"), //this is not defined in above format docs, but both universal and exuberant ctags use it
398
- // ENUM("enum"),
399
- // FILE("file"),
400
- // FUNCTION("function"),
401
- // KIND("kind"),
402
- LINE ("line" ),
403
- // NAMESPACE("namespace"), //this is not defined in above format docs, but both universal and exuberant ctags use it
404
- // PROGRAM("program"), //this is not defined in above format docs, but both universal and exuberant ctags use it
405
- SIGNATURE ("signature" );
406
- // STRUCT("struct"),
407
- // TYPEREF("typeref"),
408
- // UNION("union");
409
-
410
- //NOTE: if you edit above, always consult below charCmpEndOffset
411
- private final String name ;
412
-
413
- tagFields (String name ) {
414
- this .name = name ;
415
- }
416
-
417
- //this is very important, we only compare that amount of chars from field types with input to save time,
418
- //this number has to be long enough to get rid of disambiguation (so currently 2 characters)
419
- //TODO:
420
- //NOTE this is a big tradeoff in terms of input data, e.g. field "find"
421
- //will be considered "file" and overwrite the value, so if ctags will send us buggy input
422
- //we will output buggy data TOO!
423
- //NO VALIDATION happens of input - but then we gain LOTS of speed, due to not comparing the same field names again and again fully
424
- // 1 - means only 2 first chars are compared
425
- public static int charCmpEndOffset = 0 ; // make this MAX. 8 chars! (backwards compat to DOS/Win )
426
-
427
- //quickly get if the field name matches allowed/consumed ones
428
- public static Ctags .tagFields quickValueOf (String fullName ) {
429
- int i ;
430
- boolean match ;
431
- for (tagFields x : tagFields .values ()) {
432
- match = true ;
433
- for (i = 0 ; i <= charCmpEndOffset ; i ++) {
434
- if (x .name .charAt (i ) != fullName .charAt (i )) {
435
- match = false ;
436
- break ;
437
- }
438
- }
439
- if (match ) {
440
- return x ;
441
- }
442
- }
443
- return null ;
444
- }
445
- }
446
-
447
- private void readTags (Definitions defs ) {
448
- EnumMap <tagFields , String > fields = new EnumMap <>(tagFields .class );
386
+ private void readTags (CtagsReader reader ) {
449
387
try {
450
388
do {
451
389
String tagLine = ctagsOut .readLine ();
@@ -476,139 +414,11 @@ private void readTags(Definitions defs) {
476
414
return ;
477
415
}
478
416
479
- int p = tagLine .indexOf ('\t' );
480
- if (p <= 0 ) {
481
- //log.fine("SKIPPING LINE - NO TAB");
482
- continue ;
483
- }
484
- String def = tagLine .substring (0 , p );
485
- int mstart = tagLine .indexOf ('\t' , p + 1 );
486
-
487
- String kind = null ;
488
-
489
- int lp = tagLine .length ();
490
- while ((p = tagLine .lastIndexOf ('\t' , lp - 1 )) > 0 ) {
491
- //log.fine(" p = " + p + " lp = " + lp);
492
- String fld = tagLine .substring (p + 1 , lp );
493
- //log.fine("FIELD===" + fld);
494
- lp = p ;
495
-
496
- int sep = fld .indexOf (':' );
497
- if (sep != -1 ) {
498
- tagFields pos = tagFields .quickValueOf (fld );
499
- if (pos != null ) {
500
- String val = fld .substring (sep + 1 );
501
- fields .put (pos , val );
502
- } else {
503
- //unknown field name
504
- //don't log on purpose, since we don't consume all possible fields, so just ignore this error for now
505
- // LOGGER.log(Level.WARNING, "Unknown field name found: {0}", fld.substring(0, sep - 1));
506
- }
507
- } else {
508
- //TODO no separator, assume this is the kind
509
- kind = fld ;
510
- break ;
511
- }
512
- }
513
-
514
- String lnum = fields .get (tagFields .LINE );
515
- String signature = fields .get (tagFields .SIGNATURE );
516
- String classInher = fields .get (tagFields .CLASS );
517
-
518
- final String match ;
519
- int mlength = p - mstart ;
520
- if ((p > 0 ) && (mlength > MIN_METHOD_LINE_LENGTH )) {
521
- if (mlength < MAX_METHOD_LINE_LENGTH ) {
522
- match = tagLine .substring (mstart + 3 , p - 4 ).
523
- replace ("\\ /" , "/" ).replaceAll ("[ \t ]+" , " " ); //TODO per format we should also recognize \r and \n and \\
524
- } else {
525
- LOGGER .log (Level .FINEST , "Ctags: stripping method body for def {0} line {1}(scopes/highlight might break)" , new Object []{def , lnum });
526
- match = tagLine .substring (mstart + 3 , mstart + MAX_METHOD_LINE_LENGTH - 1 ). // +3 - 4 = -1
527
- replace ("\\ /" , "/" ).replaceAll ("[ \t ]+" , " " );
528
- }
529
- } else { //tag is in wrong format, cannot extract tagaddress from it, skip
530
- continue ;
531
- }
532
-
533
- // Bug #809: Keep track of which symbols have already been
534
- // seen to prevent duplicating them in memory.
535
- final Interner <String > seenSymbols = new Interner <>();
536
-
537
- final String type
538
- = classInher == null ? kind : kind + " in " + classInher ;
539
- addTag (defs , seenSymbols , lnum , def , type , match , classInher , signature );
540
- if (signature != null ) {
541
- //TODO if some languages use different character for separating arguments, below needs to be adjusted
542
- String [] args = signature .split ("," );
543
- for (String arg : args ) {
544
- //TODO this algorithm assumes that data types occur to
545
- // the left of the argument name, so it will not
546
- // work for languages like rust, kotlin, etc. which
547
- // place the data type to the right of the argument name.
548
- // Need an attribute from ctags to indicate data type location.
549
- // ----------------------------------------------------------------
550
- // When no assignment of default values,
551
- // expecting: <type> <name>, or <name>
552
- //
553
- // When default value assignment applied to parameter,
554
- // expecting: <type> <name> = <value> or
555
- // <name> = <value>
556
- // (Note whitespace content made irrelevant)
557
-
558
- // Need to ditch the default assignment value
559
- // so that the extraction loop below will work.
560
- // This assumes all languages use '=' to assign value.
561
-
562
- if (arg .indexOf ("=" ) != -1 ) {
563
- String [] a = arg .split ("=" );
564
- arg = a [0 ]; // throws away assigned value
565
- }
566
-
567
- // Strip out all non 'word' class symbols
568
- // which leaves just names intact.
569
- String [] names = arg .trim ().split ("[\\ W]" );
570
- String name ;
571
-
572
- // Walk the array backwards from the end and
573
- // the parameter name should always be the first
574
- // non-empty element encountered.
575
- for (int ii =names .length -1 ; ii >= 0 ; ii --) {
576
- name = names [ii ];
577
- if (name .length () > 0 ) {
578
- addTag (defs , seenSymbols , lnum , name , "argument" ,
579
- def .trim () + signature .trim (), null , signature );
580
- break ;
581
- }
582
- }
583
- }
584
- }
585
- //log.fine("Read = " + def + " : " + lnum + " = " + kind + " IS " + inher + " M " + match);
586
- fields .clear ();
417
+ reader .readLine (tagLine );
587
418
} while (true );
588
419
} catch (Exception e ) {
589
420
LOGGER .log (Level .WARNING , "CTags parsing problem: " , e );
590
421
}
591
422
LOGGER .severe ("CTag reader cycle was interrupted!" );
592
423
}
593
-
594
- /**
595
- * Add a tag to a {@code Definitions} instance.
596
- */
597
- private void addTag (Definitions defs , Interner <String > seenSymbols ,
598
- String lnum , String symbol , String type , String text , String namespace , String signature ) {
599
- // The strings are frequently repeated (a symbol can be used in
600
- // multiple definitions, multiple definitions can have the same type,
601
- // one line can contain multiple definitions). Intern them to minimize
602
- // the space consumed by them (see bug #809).
603
- int lineno = 0 ;
604
- try {
605
- lineno = Integer .parseInt (lnum );
606
- } catch (NumberFormatException nfe ) {
607
- LOGGER .log (Level .WARNING , "CTags line number parsing problem(but I will continue with line # 0) for symbol {0}" , symbol );
608
- }
609
- defs .addTag (lineno , seenSymbols .intern (symbol .trim ()),
610
- seenSymbols .intern (type .trim ()), seenSymbols .intern (text .trim ()),
611
- namespace == null ? null : seenSymbols .intern (namespace .trim ()), signature );
612
-
613
- }
614
424
}
0 commit comments