15
15
package com .google .googlejavaformat .java ;
16
16
17
17
import static com .google .common .base .Preconditions .checkArgument ;
18
- import static com .google .common .base .Preconditions .checkElementIndex ;
19
- import static java .util .Arrays .stream ;
20
18
21
19
import com .google .common .collect .ImmutableList ;
22
20
import com .google .common .collect .Lists ;
29
27
import com .sun .tools .javac .parser .Tokens .TokenKind ;
30
28
import com .sun .tools .javac .parser .UnicodeReader ;
31
29
import com .sun .tools .javac .util .Context ;
32
- import java .lang .reflect .Method ;
33
- import java .util .ArrayList ;
34
- import java .util .Collections ;
35
- import java .util .Comparator ;
36
- import java .util .HashSet ;
37
- import java .util .List ;
38
- import java .util .Objects ;
39
30
import java .util .Set ;
40
- import org .jspecify .annotations .Nullable ;
41
31
42
32
/** A wrapper around javac's lexer. */
43
33
final class JavacTokens {
@@ -54,8 +44,6 @@ static class RawTok {
54
44
private final int endPos ;
55
45
56
46
RawTok (String stringVal , TokenKind kind , int pos , int endPos ) {
57
- checkElementIndex (pos , endPos , "pos" );
58
- checkArgument (pos < endPos , "expected pos (%s) < endPos (%s)" , pos , endPos );
59
47
this .stringVal = stringVal ;
60
48
this .kind = kind ;
61
49
this .pos = pos ;
@@ -83,88 +71,30 @@ public String stringVal() {
83
71
}
84
72
}
85
73
86
- private static final TokenKind STRINGFRAGMENT =
87
- stream (TokenKind .values ())
88
- .filter (t -> t .name ().contentEquals ("STRINGFRAGMENT" ))
89
- .findFirst ()
90
- .orElse (null );
91
-
92
- static boolean isStringFragment (TokenKind kind ) {
93
- return STRINGFRAGMENT != null && Objects .equals (kind , STRINGFRAGMENT );
94
- }
95
-
96
- private static ImmutableList <Token > readAllTokens (
97
- String source , Context context , Set <Integer > nonTerminalStringFragments ) {
74
+ /** Lex the input and return a list of {@link RawTok}s. */
75
+ public static ImmutableList <RawTok > getTokens (
76
+ String source , Context context , Set <TokenKind > stopTokens ) {
98
77
if (source == null ) {
99
78
return ImmutableList .of ();
100
79
}
101
80
ScannerFactory fac = ScannerFactory .instance (context );
102
81
char [] buffer = (source + EOF_COMMENT ).toCharArray ();
103
82
Scanner scanner =
104
83
new AccessibleScanner (fac , new CommentSavingTokenizer (fac , buffer , buffer .length ));
105
- List <Token > tokens = new ArrayList <>();
106
- do {
107
- scanner .nextToken ();
108
- tokens .add (scanner .token ());
109
- } while (scanner .token ().kind != TokenKind .EOF );
110
- for (int i = 0 ; i < tokens .size (); i ++) {
111
- if (isStringFragment (tokens .get (i ).kind )) {
112
- int start = i ;
113
- while (isStringFragment (tokens .get (i ).kind )) {
114
- i ++;
115
- }
116
- for (int j = start ; j < i - 1 ; j ++) {
117
- nonTerminalStringFragments .add (tokens .get (j ).pos );
118
- }
119
- }
120
- }
121
- // A string template is tokenized as a series of STRINGFRAGMENT tokens containing the string
122
- // literal values, followed by the tokens for the template arguments. For the formatter, we
123
- // want the stream of tokens to appear in order by their start position.
124
- if (Runtime .version ().feature () >= 21 ) {
125
- Collections .sort (tokens , Comparator .comparingInt (t -> t .pos ));
126
- }
127
- return ImmutableList .copyOf (tokens );
128
- }
129
-
130
- /** Lex the input and return a list of {@link RawTok}s. */
131
- public static ImmutableList <RawTok > getTokens (
132
- String source , Context context , Set <TokenKind > stopTokens ) {
133
- if (source == null ) {
134
- return ImmutableList .of ();
135
- }
136
- Set <Integer > nonTerminalStringFragments = new HashSet <>();
137
- ImmutableList <Token > javacTokens = readAllTokens (source , context , nonTerminalStringFragments );
138
-
139
84
ImmutableList .Builder <RawTok > tokens = ImmutableList .builder ();
140
85
int end = source .length ();
141
86
int last = 0 ;
142
- for (Token t : javacTokens ) {
87
+ do {
88
+ scanner .nextToken ();
89
+ Token t = scanner .token ();
143
90
if (t .comments != null ) {
144
- // javac accumulates comments in reverse order
145
91
for (Comment c : Lists .reverse (t .comments )) {
146
- int pos = c .getSourcePos (0 );
147
- int length ;
148
- if (pos == -1 ) {
149
- // We've found a comment whose position hasn't been recorded. Deduce its position as the
150
- // first `/` character after the end of the previous token.
151
- //
152
- // javac creates a new JavaTokenizer to process string template arguments, so
153
- // CommentSavingTokenizer doesn't get a chance to preprocess those comments and save
154
- // their text and positions.
155
- //
156
- // TODO: consider always using this approach once the minimum supported JDK is 16 and
157
- // we can assume BasicComment#getRawCharacters is always available.
158
- pos = source .indexOf ('/' , last );
159
- length = CommentSavingTokenizer .commentLength (c );
160
- } else {
161
- length = c .getText ().length ();
92
+ if (last < c .getSourcePos (0 )) {
93
+ tokens .add (new RawTok (null , null , last , c .getSourcePos (0 )));
162
94
}
163
- if (last < pos ) {
164
- tokens .add (new RawTok (null , null , last , pos ));
165
- }
166
- tokens .add (new RawTok (null , null , pos , pos + length ));
167
- last = pos + length ;
95
+ tokens .add (
96
+ new RawTok (null , null , c .getSourcePos (0 ), c .getSourcePos (0 ) + c .getText ().length ()));
97
+ last = c .getSourcePos (0 ) + c .getText ().length ();
168
98
}
169
99
}
170
100
if (stopTokens .contains (t .kind )) {
@@ -176,25 +106,14 @@ public static ImmutableList<RawTok> getTokens(
176
106
if (last < t .pos ) {
177
107
tokens .add (new RawTok (null , null , last , t .pos ));
178
108
}
179
- if (isStringFragment (t .kind )) {
180
- int endPos = t .endPos ;
181
- int pos = t .pos ;
182
- if (nonTerminalStringFragments .contains (t .pos )) {
183
- // Include the \ escape from \{...} in the preceding string fragment
184
- endPos ++;
185
- }
186
- tokens .add (new RawTok (source .substring (pos , endPos ), t .kind , pos , endPos ));
187
- last = endPos ;
188
- } else {
189
- tokens .add (
190
- new RawTok (
191
- t .kind == TokenKind .STRINGLITERAL ? "\" " + t .stringVal () + "\" " : null ,
192
- t .kind ,
193
- t .pos ,
194
- t .endPos ));
195
- last = t .endPos ;
196
- }
197
- }
109
+ tokens .add (
110
+ new RawTok (
111
+ t .kind == TokenKind .STRINGLITERAL ? "\" " + t .stringVal () + "\" " : null ,
112
+ t .kind ,
113
+ t .pos ,
114
+ t .endPos ));
115
+ last = t .endPos ;
116
+ } while (scanner .token ().kind != TokenKind .EOF );
198
117
if (last < end ) {
199
118
tokens .add (new RawTok (null , null , last , end ));
200
119
}
@@ -203,32 +122,6 @@ public static ImmutableList<RawTok> getTokens(
203
122
204
123
/** A {@link JavaTokenizer} that saves comments. */
205
124
static class CommentSavingTokenizer extends JavaTokenizer {
206
-
207
- private static final Method GET_RAW_CHARACTERS_METHOD = getRawCharactersMethod ();
208
-
209
- private static @ Nullable Method getRawCharactersMethod () {
210
- try {
211
- // This is a method in PositionTrackingReader, but that class is not public.
212
- return BasicComment .class .getMethod ("getRawCharacters" );
213
- } catch (NoSuchMethodException e ) {
214
- return null ;
215
- }
216
- }
217
-
218
- static int commentLength (Comment comment ) {
219
- if (comment instanceof BasicComment && GET_RAW_CHARACTERS_METHOD != null ) {
220
- // If we've seen a BasicComment instead of a CommentWithTextAndPosition, getText() will
221
- // be null, so we deduce the length using getRawCharacters. See also the comment at the
222
- // usage of this method in getTokens.
223
- try {
224
- return ((char []) GET_RAW_CHARACTERS_METHOD .invoke (((BasicComment ) comment ))).length ;
225
- } catch (ReflectiveOperationException e ) {
226
- throw new LinkageError (e .getMessage (), e );
227
- }
228
- }
229
- return comment .getText ().length ();
230
- }
231
-
232
125
CommentSavingTokenizer (ScannerFactory fac , char [] buffer , int length ) {
233
126
super (fac , buffer , length );
234
127
}
0 commit comments