diff --git a/src/org/opensolaris/opengrok/analysis/Ctags.java b/src/org/opensolaris/opengrok/analysis/Ctags.java index ab438d8acac..e07229da343 100644 --- a/src/org/opensolaris/opengrok/analysis/Ctags.java +++ b/src/org/opensolaris/opengrok/analysis/Ctags.java @@ -36,6 +36,10 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; +import org.opensolaris.opengrok.analysis.AnalyzerGuru; +import org.opensolaris.opengrok.analysis.FileAnalyzerFactory; +import org.opensolaris.opengrok.analysis.fortran.FortranAnalyzer; +import org.opensolaris.opengrok.analysis.fortran.FortranAnalyzerFactory; import org.opensolaris.opengrok.configuration.RuntimeEnvironment; import org.opensolaris.opengrok.logger.LoggerFactory; import org.opensolaris.opengrok.util.IOUtils; @@ -380,6 +384,10 @@ public Definitions doCtags(String file) throws IOException, CtagsReader rdr = new CtagsReader(); rdr.setSplitterSupplier(() -> { return trySplitSource(file); }); rdr.setTabSize(tabSize); + FileAnalyzerFactory factory = AnalyzerGuru.find(file); + if (factory instanceof FortranAnalyzerFactory) { + rdr.setNormalizeIdentifier(FortranAnalyzer::normalizeIdentifier); + } Definitions ret; try { ctagsIn.write(file + "\n"); diff --git a/src/org/opensolaris/opengrok/analysis/CtagsReader.java b/src/org/opensolaris/opengrok/analysis/CtagsReader.java index b04a3f13a71..bcd1c0c8aca 100644 --- a/src/org/opensolaris/opengrok/analysis/CtagsReader.java +++ b/src/org/opensolaris/opengrok/analysis/CtagsReader.java @@ -25,6 +25,7 @@ package org.opensolaris.opengrok.analysis; import java.util.EnumMap; +import java.util.function.Function; import java.util.function.Supplier; import java.util.logging.Level; import java.util.logging.Logger; @@ -93,6 +94,12 @@ public class CtagsReader { private int tabSize; + private Function normalizeIdentifier = str -> str; + + public void setNormalizeIdentifier(Function normalize) { + this.normalizeIdentifier = normalize; + } + /** * This should mimic * https://github.com/universal-ctags/ctags/blob/master/docs/format.rst or @@ -141,7 +148,7 @@ public enum tagFields { * VALIDATION happens of input - but then we gain LOTS of speed, due to * not comparing the same field names again and again fully. */ - public static int charCmpEndOffset = 0; + public static int charCmpEndOffset = 1; // Need to distinguish FORTRAN's subroutine from signature /** * Quickly get if the field name matches allowed/consumed ones @@ -279,8 +286,10 @@ public void readLine(String tagLine) { " I will continue with line # 0) for symbol {0}", def); } + // NOTE: bestIndexOfTag searches the source, so it needs the + // precise, non-normalized symbol. CpatIndex cidx = bestIndexOfTag(lineno, whole, def); - addTag(defs, cidx.lineno, def, type, match, classInher, signature, + addTag(defs, cidx.lineno, normalizeIdentifier.apply(def), type, match, classInher, signature, cidx.lineStart, cidx.lineEnd); String[] args; @@ -326,7 +335,7 @@ public void readLine(String tagLine) { name = arg; } if (name != null) { - addTag(defs, cidx.lineno, name, "argument", def.trim() + + addTag(defs, cidx.lineno, normalizeIdentifier.apply(name), "argument", def.trim() + signature.trim(), null, signature, cidx.lineStart, cidx.lineEnd); } else { diff --git a/src/org/opensolaris/opengrok/analysis/JFlexNonXref.java b/src/org/opensolaris/opengrok/analysis/JFlexNonXref.java index fd5fb98add9..79ab6c30a24 100644 --- a/src/org/opensolaris/opengrok/analysis/JFlexNonXref.java +++ b/src/org/opensolaris/opengrok/analysis/JFlexNonXref.java @@ -473,7 +473,7 @@ protected boolean writeSymbol(String symbol, Set keywords, int line, protected boolean writeSymbol(String symbol, Set keywords, int line, boolean caseSensitive, boolean isKeyword) throws IOException { return JFlexXrefUtils.writeSymbol(out, defs, urlPrefix, project, - symbol, keywords, line, caseSensitive, isKeyword); + symbol, symbol, keywords, line, caseSensitive, isKeyword); } /** diff --git a/src/org/opensolaris/opengrok/analysis/JFlexSymbolMatcher.java b/src/org/opensolaris/opengrok/analysis/JFlexSymbolMatcher.java index 41e23ce06a9..ee7ea5e6766 100644 --- a/src/org/opensolaris/opengrok/analysis/JFlexSymbolMatcher.java +++ b/src/org/opensolaris/opengrok/analysis/JFlexSymbolMatcher.java @@ -38,6 +38,8 @@ public abstract class JFlexSymbolMatcher extends JFlexStateStacker private NonSymbolMatchedListener nonSymbolListener; private String disjointSpanClassName; + public String normalizeIdentifier(String id) { return id; } + /** * Associates the specified listener, replacing the former one. * @param l defined instance @@ -97,7 +99,7 @@ protected String getDisjointSpanClassName() { protected void onSymbolMatched(String str, int start) { SymbolMatchedListener l = symbolListener; if (l != null) { - SymbolMatchedEvent evt = new SymbolMatchedEvent(this, str, start, + SymbolMatchedEvent evt = new SymbolMatchedEvent(this, str, normalizeIdentifier(str), start, start + str.length()); l.symbolMatched(evt); } diff --git a/src/org/opensolaris/opengrok/analysis/JFlexTokenizer.java b/src/org/opensolaris/opengrok/analysis/JFlexTokenizer.java index b6a3f2f03e3..fd4c3d7c39a 100644 --- a/src/org/opensolaris/opengrok/analysis/JFlexTokenizer.java +++ b/src/org/opensolaris/opengrok/analysis/JFlexTokenizer.java @@ -105,7 +105,7 @@ public final boolean incrementToken() throws IOException { */ @Override public void symbolMatched(SymbolMatchedEvent evt) { - setAttribs(evt.getStr(), evt.getStart(), evt.getEnd()); + setAttribs(evt.getNormalizedStr(), evt.getStart(), evt.getEnd()); } /** diff --git a/src/org/opensolaris/opengrok/analysis/JFlexXref.java b/src/org/opensolaris/opengrok/analysis/JFlexXref.java index 48a2fd91bae..09180a24996 100644 --- a/src/org/opensolaris/opengrok/analysis/JFlexXref.java +++ b/src/org/opensolaris/opengrok/analysis/JFlexXref.java @@ -203,7 +203,7 @@ public void setFoldingEnabled(boolean foldingEnabled) { public void symbolMatched(SymbolMatchedEvent evt) { try { JFlexXrefUtils.writeSymbol(out, defs, urlPrefix, project, - evt.getStr(), null, matcher.getLineNumber(), false, false); + evt.getStr(), evt.getNormalizedStr(), null, matcher.getLineNumber(), false, false); } catch (IOException ex) { throw new RuntimeException(ex); } @@ -292,7 +292,7 @@ public void linkageMatched(LinkageMatchedEvent evt) { break; case LABELDEF: // Only PowerShell seems to be using this. - JFlexXrefUtils.writeSameFileLinkSymbol(out, str); + JFlexXrefUtils.writeSameFileLinkSymbol(out, str, str); break; case FILELIKE: out.write(" keywords, + String urlPrefix, Project project, String symbol, String id, Set keywords, int line, boolean caseSensitive, boolean isKeyword) throws IOException { String[] strs = new String[1]; @@ -255,7 +256,7 @@ public static boolean writeSymbol(Writer out, Definitions defs, return false; } - if (defs != null && defs.hasDefinitionAt(symbol, line, strs)) { + if (defs != null && defs.hasDefinitionAt(id, line, strs)) { // This is the definition of the symbol. String type = strs[0]; String style_class = "d"; @@ -281,7 +282,7 @@ public static boolean writeSymbol(Writer out, Definitions defs, out.append(""); } @@ -289,7 +290,7 @@ public static boolean writeSymbol(Writer out, Definitions defs, out.append(""); Util.htmlize(symbol, out); out.append(""); - } else if (defs != null && defs.occurrences(symbol) == 1) { - writeSameFileLinkSymbol(out, symbol); + } else if (defs != null && defs.occurrences(id) == 1) { + writeSameFileLinkSymbol(out, symbol, id); } else { // This is a symbol that is not defined in this file, or a symbol // that is defined more than once in this file. In either case, we @@ -308,7 +309,7 @@ public static boolean writeSymbol(Writer out, Definitions defs, out.append(""); diff --git a/src/org/opensolaris/opengrok/analysis/SymbolMatchedEvent.java b/src/org/opensolaris/opengrok/analysis/SymbolMatchedEvent.java index c8e962238b8..e4c5ee80be3 100644 --- a/src/org/opensolaris/opengrok/analysis/SymbolMatchedEvent.java +++ b/src/org/opensolaris/opengrok/analysis/SymbolMatchedEvent.java @@ -36,6 +36,7 @@ public class SymbolMatchedEvent { private final Object source; private final String str; + private final String normalizedStr; private final int start; private final int end; @@ -43,12 +44,14 @@ public class SymbolMatchedEvent { * Initializes an immutable instance of {@link SymbolMatchedEvent}. * @param source the event source * @param str the symbol string + * @param normalizedStr the symbol string, normalized according to language-specific conventions * @param start the symbol start position * @param end the symbol end position */ - public SymbolMatchedEvent(Object source, String str, int start, int end) { + public SymbolMatchedEvent(Object source, String str, String normalizedStr, int start, int end) { this.source = source; this.str = str; + this.normalizedStr = normalizedStr; this.start = start; this.end = end; } @@ -69,6 +72,14 @@ public String getStr() { return str; } + /** + * Gets the normalized symbol string. + * @return the initial value + */ + public String getNormalizedStr() { + return normalizedStr; + } + /** * Gets the symbol start position. * @return the initial value diff --git a/src/org/opensolaris/opengrok/analysis/fortran/FortranAnalyzer.java b/src/org/opensolaris/opengrok/analysis/fortran/FortranAnalyzer.java index f5097086b39..03f16a2233d 100644 --- a/src/org/opensolaris/opengrok/analysis/fortran/FortranAnalyzer.java +++ b/src/org/opensolaris/opengrok/analysis/fortran/FortranAnalyzer.java @@ -24,6 +24,7 @@ package org.opensolaris.opengrok.analysis.fortran; import java.io.Reader; +import org.opensolaris.opengrok.analysis.Ctags; import org.opensolaris.opengrok.analysis.FileAnalyzer; import org.opensolaris.opengrok.analysis.JFlexTokenizer; import org.opensolaris.opengrok.analysis.JFlexXref; @@ -36,6 +37,10 @@ */ public class FortranAnalyzer extends AbstractSourceCodeAnalyzer { + public static String normalizeIdentifier(String id) { + return id.toLowerCase() + "_"; + } + FortranAnalyzer(FortranAnalyzerFactory factory) { super(factory, new JFlexTokenizer(new FortranSymbolTokenizer( FileAnalyzer.dummyReader))); diff --git a/src/org/opensolaris/opengrok/analysis/fortran/FortranSymbolTokenizer.lex b/src/org/opensolaris/opengrok/analysis/fortran/FortranSymbolTokenizer.lex index b157eaa9a45..d396abfcdae 100644 --- a/src/org/opensolaris/opengrok/analysis/fortran/FortranSymbolTokenizer.lex +++ b/src/org/opensolaris/opengrok/analysis/fortran/FortranSymbolTokenizer.lex @@ -38,6 +38,11 @@ import org.opensolaris.opengrok.analysis.JFlexSymbolMatcher; %include CommonLexer.lexh %char +%{ + @Override + public String normalizeIdentifier(String id) { return FortranAnalyzer.normalizeIdentifier(id); } +%} + // (OK to exclude LCOMMENT state used in FortranXref.) %state STRING SCOMMENT QSTRING diff --git a/src/org/opensolaris/opengrok/analysis/fortran/FortranXref.lex b/src/org/opensolaris/opengrok/analysis/fortran/FortranXref.lex index 5041555b2b3..4b6bf30d453 100644 --- a/src/org/opensolaris/opengrok/analysis/fortran/FortranXref.lex +++ b/src/org/opensolaris/opengrok/analysis/fortran/FortranXref.lex @@ -60,6 +60,9 @@ import org.opensolaris.opengrok.web.HtmlConsts; break; } } + + @Override + public String normalizeIdentifier(String id) { return FortranAnalyzer.normalizeIdentifier(id); } %} File = [a-zA-Z]{FNameChar}* ".inc" diff --git a/src/org/opensolaris/opengrok/search/context/Context.java b/src/org/opensolaris/opengrok/search/context/Context.java index a584921827d..541768fb92f 100644 --- a/src/org/opensolaris/opengrok/search/context/Context.java +++ b/src/org/opensolaris/opengrok/search/context/Context.java @@ -37,6 +37,7 @@ import java.util.List; import java.util.Map; import java.util.TreeMap; +import java.util.function.Function; import java.util.logging.Level; import java.util.logging.Logger; @@ -44,8 +45,12 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.opensolaris.opengrok.analysis.AnalyzerGuru; import org.opensolaris.opengrok.analysis.Definitions; import org.opensolaris.opengrok.analysis.FileAnalyzer; +import org.opensolaris.opengrok.analysis.FileAnalyzerFactory; +import org.opensolaris.opengrok.analysis.fortran.FortranAnalyzer; +import org.opensolaris.opengrok.analysis.fortran.FortranAnalyzerFactory; import org.opensolaris.opengrok.analysis.Scopes; import org.opensolaris.opengrok.analysis.Scopes.Scope; import org.opensolaris.opengrok.analysis.plain.PlainAnalyzerFactory; @@ -416,6 +421,10 @@ public boolean getContext(Reader in, Writer out, String urlPrefix, String token; int matchState; int matchedLines = 0; + FileAnalyzerFactory factory = AnalyzerGuru.find(path); + if (factory instanceof FortranAnalyzerFactory) { + tokens.setNormalizeIdentifier(FortranAnalyzer::normalizeIdentifier); + } while ((token = tokens.yylex()) != null && (!lim || matchedLines < limit_max_lines)) { for (int i = 0; i < m.length; i++) { @@ -455,6 +464,7 @@ public boolean getContext(Reader in, Writer out, String urlPrefix, } } } + tokens.resetNormalizeIdentifier(); return anything; } } diff --git a/src/org/opensolaris/opengrok/search/context/PlainLineTokenizer.lex b/src/org/opensolaris/opengrok/search/context/PlainLineTokenizer.lex index 6bd11c7c533..e85a70e1c3a 100644 --- a/src/org/opensolaris/opengrok/search/context/PlainLineTokenizer.lex +++ b/src/org/opensolaris/opengrok/search/context/PlainLineTokenizer.lex @@ -32,6 +32,7 @@ import java.io.Reader; import java.io.Writer; import java.util.List; import java.util.TreeMap; +import java.util.function.Function; import org.opensolaris.opengrok.search.Hit; import org.opensolaris.opengrok.web.Util; import org.opensolaris.opengrok.analysis.Scopes; @@ -72,6 +73,14 @@ import org.opensolaris.opengrok.analysis.Scopes.Scope; boolean alt; Scopes scopes = null; + Function normalizeIdentifier = str -> str; + public void setNormalizeIdentifier(Function normalizeIdentifier) { + this.normalizeIdentifier = normalizeIdentifier; + } + public void resetNormalizeIdentifier() { + this.normalizeIdentifier = str -> str; + } + /** * Set the writer that should receive all output * @param out The new writer to write to @@ -400,7 +409,13 @@ Printable = [\@\$\%\^\&\-+=\?\.\:] %% -{Identifier}|{Number}|{Printable} { +{Identifier} { + String text = yytext(); + markedContents.append(text); + return normalizeIdentifier.apply(text); +} + +{Number}|{Printable} { String text = yytext(); markedContents.append(text); return text; diff --git a/test/org/opensolaris/opengrok/analysis/JFlexTokenizerTest.java b/test/org/opensolaris/opengrok/analysis/JFlexTokenizerTest.java index a14928c93e0..2fce1fc5da3 100644 --- a/test/org/opensolaris/opengrok/analysis/JFlexTokenizerTest.java +++ b/test/org/opensolaris/opengrok/analysis/JFlexTokenizerTest.java @@ -76,7 +76,8 @@ public void testOffsetAttribute() throws Exception { // create a text fragment that it understands testOffsetAttribute(FortranSymbolTokenizer.class, "1 token1 = token2 + token3", - new String[]{"token1", "token2", "token3"}); + new String[]{"token1", "token2", "token3"}, + true); } /** @@ -98,6 +99,11 @@ private void testOffsetAttribute(Class klass) private void testOffsetAttribute(Class klass, String inputText, String[] expectedTokens) throws Exception { + testOffsetAttribute(klass, inputText, expectedTokens, false); + } + private void testOffsetAttribute(Class klass, + String inputText, String[] expectedTokens, Boolean matchPrefix) + throws Exception { JFlexSymbolMatcher matcher = klass.getConstructor(Reader.class). newInstance(new StringReader(inputText)); JFlexTokenizer tokenizer = new JFlexTokenizer(matcher); @@ -109,7 +115,11 @@ private void testOffsetAttribute(Class klass, while (tokenizer.incrementToken()) { assertTrue("too many tokens", count < expectedTokens.length); String expected = expectedTokens[count]; - assertEquals("term", expected, term.toString()); + if (matchPrefix) { + assertEquals("term", term.toString().indexOf(expected), 0); + } else { + assertEquals("term", expected, term.toString()); + } assertEquals("start", inputText.indexOf(expected), offset.startOffset()); assertEquals("end", diff --git a/test/org/opensolaris/opengrok/analysis/fortran/FortranXrefTest.java b/test/org/opensolaris/opengrok/analysis/fortran/FortranXrefTest.java index 52d1a2b9657..3dcb5eb3a08 100644 --- a/test/org/opensolaris/opengrok/analysis/fortran/FortranXrefTest.java +++ b/test/org/opensolaris/opengrok/analysis/fortran/FortranXrefTest.java @@ -116,6 +116,7 @@ private Definitions getTagsDefinitions() throws IOException { res, "UTF-8")); CtagsReader rdr = new CtagsReader(); + rdr.setNormalizeIdentifier(FortranAnalyzer::normalizeIdentifier); String line; while ((line = in.readLine()) != null) { rdr.readLine(line); diff --git a/test/org/opensolaris/opengrok/analysis/fortran/sample_xref.html b/test/org/opensolaris/opengrok/analysis/fortran/sample_xref.html index 099c956a329..dcbb9784e49 100644 --- a/test/org/opensolaris/opengrok/analysis/fortran/sample_xref.html +++ b/test/org/opensolaris/opengrok/analysis/fortran/sample_xref.html @@ -4,7 +4,7 @@ sampleFile - OpenGrok cross reference for /sampleFile 1* Copyright (c) 2013 Samuel Halliday +function get_sym_list(){return [["Subroutine","xsr",[["dgesv_",173]]]];} /* ]]> */1* Copyright (c) 2013 Samuel Halliday 2* Copyright (c) 1992-2011 The University of Tennessee and The University 3* of Tennessee Research Foundation. All rights 4* reserved. @@ -176,7 +176,7 @@ 170*> \ingroup doubleGEsolve 171* 172* ===================================================================== -173 SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +173 SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) 174* 175* -- LAPACK driver routine (version 3.4.0) -- 176* -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -184,49 +184,49 @@ 178* November 2011 179* 180* .. Scalar Arguments .. -181 INTEGER INFO, LDA, LDB, N, NRHS +181 INTEGER INFO, LDA, LDB, N, NRHS 182* .. 183* .. Array Arguments .. -184 INTEGER IPIV( * ) -185 DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +184 INTEGER IPIV( * ) +185 DOUBLE PRECISION A( LDA, * ), B( LDB, * ) 186* .. 187* 188* ===================================================================== 189* 190* .. External Subroutines .. -191 EXTERNAL DGETRF, DGETRS, XERBLA +191 EXTERNAL DGETRF, DGETRS, XERBLA 192* .. 193* .. Intrinsic Functions .. -194 INTRINSIC MAX +194 INTRINSIC MAX 195* .. 196* .. Executable Statements .. 197* 198* Test the input parameters. 199* -200 INFO = 0 + 0xFFFF - 0XFF - 0xFF00 +200 INFO = 0 + 0xFFFF - 0XFF - 0xFF00 201 IF( N.LT.0 ) THEN -202 INFO = -1 -203 ELSE IF( NRHS.LT.0 ) THEN -204 INFO = -2 -205 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN -206 INFO = -4 -207 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN -208 INFO = -7 +202 INFO = -1 +203 ELSE IF( NRHS.LT.0 ) THEN +204 INFO = -2 +205 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN +206 INFO = -4 +207 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN +208 INFO = -7 209 END IF -210 IF( INFO.NE.0 ) THEN -211 CALL XERBLA( 'DGESV ', -INFO ) +210 IF( INFO.NE.0 ) THEN +211 CALL XERBLA( 'DGESV ', -INFO ) 212 RETURN 213 END IF 214* 215* Compute the LU factorization of A. 216* -217 CALL DGETRF( N, N, A, LDA, IPIV, INFO ) -218 IF( INFO.EQ.0 ) THEN +217 CALL DGETRF( N, N, A, LDA, IPIV, INFO ) +218 IF( INFO.EQ.0 ) THEN 219* 220* Solve the system A*X = B, overwriting B with X. 221* -222 CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, -223 $ INFO ) +222 CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, +223 $ INFO ) 224 END IF 225 RETURN 226* diff --git a/test/org/opensolaris/opengrok/analysis/fortran/samplesymbols.txt b/test/org/opensolaris/opengrok/analysis/fortran/samplesymbols.txt index a41cfb276bb..8b71b9a8112 100644 --- a/test/org/opensolaris/opengrok/analysis/fortran/samplesymbols.txt +++ b/test/org/opensolaris/opengrok/analysis/fortran/samplesymbols.txt @@ -1,54 +1,54 @@ -DGESV # 173: SUBROUTINE DGESV -N -NRHS -A -LDA -IPIV -B -LDB -INFO -INFO -LDA -LDB -N -NRHS -IPIV -A -LDA -B -LDB -DGETRF # 191: EXTERNAL DGETRF -DGETRS -XERBLA -INTRINSIC -INFO -N -INFO -NRHS -INFO -LDA -N -INFO -LDB -N -INFO -INFO -XERBLA -INFO -DGETRF # 217: CALL DGETRF -N -N -A -LDA -IPIV -INFO -INFO -DGETRS -N -NRHS -A -LDA -IPIV -B -LDB -INFO +dgesv_ # 173: SUBROUTINE DGESV +n_ +nrhs_ +a_ +lda_ +ipiv_ +b_ +ldb_ +info_ +info_ +lda_ +ldb_ +n_ +nrhs_ +ipiv_ +a_ +lda_ +b_ +ldb_ +dgetrf_ # 191: EXTERNAL DGETRF +dgetrs_ +xerbla_ +intrinsic_ +info_ +n_ +info_ +nrhs_ +info_ +lda_ +n_ +info_ +ldb_ +n_ +info_ +info_ +xerbla_ +info_ +dgetrf_ # 217: CALL DGETRF +n_ +n_ +a_ +lda_ +ipiv_ +info_ +info_ +dgetrs_ +n_ +nrhs_ +a_ +lda_ +ipiv_ +b_ +ldb_ +info_ diff --git a/test/org/opensolaris/opengrok/util/CustomAssertions.java b/test/org/opensolaris/opengrok/util/CustomAssertions.java index cc7ac2a597e..346f82ccd92 100644 --- a/test/org/opensolaris/opengrok/util/CustomAssertions.java +++ b/test/org/opensolaris/opengrok/util/CustomAssertions.java @@ -118,10 +118,10 @@ public static void assertSymbolStream( byte[] inputCopy = copyStream(iss); String input = new String(inputCopy, StandardCharsets.UTF_8); - JFlexTokenizer tokenizer = new JFlexTokenizer( - klass.getConstructor(Reader.class).newInstance( + JFlexSymbolMatcher matcher = klass.getConstructor(Reader.class).newInstance( new InputStreamReader(new ByteArrayInputStream(inputCopy), - StandardCharsets.UTF_8))); + StandardCharsets.UTF_8)); + JFlexTokenizer tokenizer = new JFlexTokenizer(matcher); CharTermAttribute term = tokenizer.addAttribute( CharTermAttribute.class); @@ -133,8 +133,8 @@ public static void assertSymbolStream( String termValue = term.toString(); tokens.add(termValue); - String cutValue = input.substring(offs.startOffset(), - offs.endOffset()); + String cutValue = matcher.normalizeIdentifier( + input.substring(offs.startOffset(), offs.endOffset())); assertEquals("cut term" + (1 + count), cutValue, termValue); ++count; }