From 748b6ae14d91c09cd987b4512e0dddd189ac8a7b Mon Sep 17 00:00:00 2001 From: Chris Fraire Date: Wed, 20 Mar 2019 18:22:00 -0500 Subject: [PATCH 1/6] Just cleanup thanks to IDEA --- .../indexer/search/context/Context.java | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java index 12877e4c8fe..e4484ebda6a 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java @@ -20,7 +20,7 @@ /* * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. * Portions Copyright 2011 Jens Elkner. - * Portions Copyright (c) 2018, 2020, Chris Fraire . + * Portions Copyright (c) 2018-2020, Chris Fraire . */ package org.opengrok.indexer.search.context; @@ -70,8 +70,7 @@ public class Context { * whose values tell if the field is case insensitive (true for * insensitivity, false for sensitivity). */ - private static final Map TOKEN_FIELDS = - new HashMap(); + private static final Map TOKEN_FIELDS = new HashMap<>(); static { TOKEN_FIELDS.put(QueryBuilder.FULL, Boolean.TRUE); TOKEN_FIELDS.put(QueryBuilder.REFS, Boolean.FALSE); @@ -188,7 +187,7 @@ public boolean getContext2(RuntimeEnvironment env, IndexSearcher searcher, ContextArgs args = new ContextArgs(env.getContextSurround(), env.getContextLimit()); - /** + /* * Lucene adds to the following value in FieldHighlighter, so avoid * integer overflow by not using Integer.MAX_VALUE -- Short is good * enough. @@ -204,7 +203,7 @@ public boolean getContext2(RuntimeEnvironment env, IndexSearcher searcher, OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, searcher, anz); - uhi.setBreakIterator(() -> new StrictLineBreakIterator()); + uhi.setBreakIterator(StrictLineBreakIterator::new); uhi.setFormatter(formatter); uhi.setTabSize(tabSize); @@ -251,11 +250,15 @@ private String buildQueryAsURI(Map subqueries) { private boolean alt = true; - public boolean getContext(Reader in, Writer out, String urlPrefix, + /** + * This method exists only for testing. + */ + boolean getContext(Reader in, Writer out, String urlPrefix, String morePrefix, String path, Definitions tags, boolean limit, boolean isDefSearch, List hits) { return getContext(in, out, urlPrefix, morePrefix, path, tags, limit, isDefSearch, hits, null); } + /** * ???. * Closes the given in reader on return. @@ -285,11 +288,11 @@ public boolean getContext(Reader in, Writer out, String urlPrefix, (urlPrefix == null) ? "" : Util.URIEncodePath(urlPrefix); String pathE = Util.URIEncodePath(path); if (tags != null) { - matchingTags = new TreeMap(); + matchingTags = new TreeMap<>(); try { for (Definitions.Tag tag : tags.getTags()) { - for (int i = 0; i < m.length; i++) { - if (m[i].match(tag.symbol) == LineMatcher.MATCHED) { + for (LineMatcher lineMatcher : m) { + if (lineMatcher.match(tag.symbol) == LineMatcher.MATCHED) { String scope = null; String scopeUrl = null; if (scopes != null) { @@ -359,7 +362,7 @@ public boolean getContext(Reader in, Writer out, String urlPrefix, } } } - /** + /* * Just to get the matching tag send a null in */ if (in == null) { @@ -419,8 +422,8 @@ public boolean getContext(Reader in, Writer out, String urlPrefix, int matchedLines = 0; while ((token = tokens.yylex()) != null && (!lim || matchedLines < limit_max_lines)) { - for (int i = 0; i < m.length; i++) { - matchState = m[i].match(token); + for (LineMatcher lineMatcher : m) { + matchState = lineMatcher.match(token); if (matchState == LineMatcher.MATCHED) { if (!isDefSearch) { tokens.printContext(); From d87876cd5e117454fe226d66bab0b182ea9f7fdb Mon Sep 17 00:00:00 2001 From: Chris Fraire Date: Wed, 20 Mar 2019 18:26:14 -0500 Subject: [PATCH 2/6] Remove RuntimeEnvironment parameter --- .../java/org/opengrok/indexer/search/Results.java | 13 ++++--------- .../opengrok/indexer/search/context/Context.java | 6 +++--- opengrok-web/src/main/webapp/more.jsp | 6 +++--- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Results.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Results.java index 42090969098..de6a38672f8 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Results.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Results.java @@ -45,7 +45,6 @@ import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; -import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; @@ -82,12 +81,10 @@ private Results() { * @param startIdx the index of the first hit to check * @param stopIdx the index of the last hit to check * @return a (directory, hitDocument) hashmap - * @throws CorruptIndexException - * @throws IOException */ private static Map> createMap( IndexSearcher searcher, ScoreDoc[] hits, int startIdx, long stopIdx) - throws CorruptIndexException, IOException { + throws IOException { LinkedHashMap> dirHash = new LinkedHashMap<>(); @@ -201,7 +198,7 @@ public static void prettyPrint(Writer out, SearchHelper sh, int start, } int tabSize = sh.getTabSize(p); - PrintPlainFinalArgs fargs = new PrintPlainFinalArgs(out, sh, env, + PrintPlainFinalArgs fargs = new PrintPlainFinalArgs(out, sh, xrefPrefix, tabSize, morePrefix); out.write(""); @@ -272,7 +269,7 @@ private static void printPlain(PrintPlainFinalArgs fargs, Document doc, fargs.shelp.sourceContext.toggleAlt(); - boolean didPresentNew = fargs.shelp.sourceContext.getContext2(fargs.env, + boolean didPresentNew = fargs.shelp.sourceContext.getContext2( fargs.shelp.searcher, docId, fargs.out, fargs.xrefPrefix, fargs.morePrefix, true, fargs.tabSize); @@ -321,17 +318,15 @@ private static String htmlize(String raw) { private static class PrintPlainFinalArgs { final Writer out; final SearchHelper shelp; - final RuntimeEnvironment env; final String xrefPrefix; final String morePrefix; final int tabSize; PrintPlainFinalArgs(Writer out, SearchHelper shelp, - RuntimeEnvironment env, String xrefPrefix, int tabSize, + String xrefPrefix, int tabSize, String morePrefix) { this.out = out; this.shelp = shelp; - this.env = env; this.xrefPrefix = xrefPrefix; this.morePrefix = morePrefix; this.tabSize = tabSize; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java index e4484ebda6a..fe74f276477 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java @@ -60,6 +60,7 @@ public class Context { private static final Logger LOGGER = LoggerFactory.getLogger(Context.class); + private final RuntimeEnvironment env; private final Query query; private final QueryBuilder qbuilder; private final LineMatcher[] m; @@ -89,6 +90,7 @@ public Context(Query query, QueryBuilder qbuilder) { throw new IllegalArgumentException("qbuilder is null"); } + this.env = RuntimeEnvironment.getInstance(); this.query = query; this.qbuilder = qbuilder; QueryMatchers qm = new QueryMatchers(); @@ -114,7 +116,6 @@ public boolean isEmpty() { /** * Look for context for this instance's initialized query in a search result * {@link Document}, and output according to the parameters. - * @param env required environment * @param searcher required search that produced the document * @param docId document ID for producing context * @param dest required target to write @@ -131,7 +132,7 @@ public boolean isEmpty() { * re-indexing * @return Did it get any matching context? */ - public boolean getContext2(RuntimeEnvironment env, IndexSearcher searcher, + public boolean getContext2(IndexSearcher searcher, int docId, Appendable dest, String urlPrefix, String morePrefix, boolean limit, int tabSize) { @@ -372,7 +373,6 @@ public boolean getContext(Reader in, Writer out, String urlPrefix, PlainLineTokenizer tokens = new PlainLineTokenizer(null); boolean truncated = false; boolean lim = limit; - RuntimeEnvironment env = RuntimeEnvironment.getInstance(); if (!env.isQuickContextScan()) { lim = false; } diff --git a/opengrok-web/src/main/webapp/more.jsp b/opengrok-web/src/main/webapp/more.jsp index e24a91d285a..3aafdadeea9 100644 --- a/opengrok-web/src/main/webapp/more.jsp +++ b/opengrok-web/src/main/webapp/more.jsp @@ -20,7 +20,7 @@ CDDL HEADER END Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. Portions Copyright 2011 Jens Elkner. -Portions Copyright (c) 2018, 2020, Chris Fraire . +Portions Copyright (c) 2018-2020, Chris Fraire . --%><%@page errorPage="error.jsp" import=" java.io.File, @@ -88,12 +88,12 @@ file="mast.jsp" String xrefPrefix = request.getContextPath() + Prefix.XREF_P; boolean didPresentNew = false; if (docId >= 0) { - didPresentNew = searchHelper.sourceContext.getContext2(env, + didPresentNew = searchHelper.sourceContext.getContext2( searchHelper.searcher, docId, out, xrefPrefix, null, false, tabSize); } if (!didPresentNew) { - /** + /* * Fall back to the old view, which re-analyzes text using * PlainLinetokenizer. E.g., when source code is updated (thus * affecting timestamps) but re-indexing is not yet complete. From 52f818f0734a6f981bfa88453ce60a6723983fa5 Mon Sep 17 00:00:00 2001 From: Chris Fraire Date: Wed, 20 Mar 2019 19:07:44 -0500 Subject: [PATCH 3/6] Hit must take a filename - path, filename, and directory are now immutable, so require initialization --- .../java/org/opengrok/indexer/search/Hit.java | 122 +++--------------- .../org/opengrok/indexer/search/HitTest.java | 50 ++----- 2 files changed, 31 insertions(+), 141 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Hit.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Hit.java index 2bd7592f2b6..d5bdd774b2b 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Hit.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Hit.java @@ -21,6 +21,7 @@ * Copyright (c) 2005, 2018, Oracle and/or its affiliates. All rights reserved. * portions copyright 2005 Trond Norbye. All rights reserved. * Use is subject to license terms. + * Portions Copyright (c) 2019, Chris Fraire . */ package org.opengrok.indexer.search; @@ -31,16 +32,16 @@ * * @author Trond Norbye */ -public class Hit implements Comparable { +public class Hit { /** * Holds value of property filename. */ - private String filename; + private final String filename; /** * Holds value of property directory. */ - private String directory; + private final String directory; /** * Holds value of property line. @@ -65,17 +66,19 @@ public class Hit implements Comparable { /** * path relative to source root. */ - private String path; + private final String path; /** - * Creates a new instance of Hit. + * Creates a new, possibly-defined instance. + * + * @param filename The name of the file this hit represents */ - public Hit() { - this(null, null, null, false, false); + public Hit(String filename) { + this(filename, null, null, false, false); } /** - * Creates a new instance of Hit. + * Creates a new, possibly-defined instance. * * @param filename The name of the file this hit represents * @param line The line containing the match @@ -88,10 +91,16 @@ public Hit(String filename, String line, String lineno, boolean binary, boolean File file = new File(filename); this.path = filename; this.filename = file.getName(); - this.directory = file.getParent(); - if (directory == null) { + final String parent = file.getParent(); + if (parent == null) { directory = ""; + } else { + directory = parent; } + } else { + this.path = ""; + this.filename = ""; + this.directory = ""; } this.line = line; this.lineno = lineno; @@ -99,104 +108,38 @@ public Hit(String filename, String line, String lineno, boolean binary, boolean this.alt = alt; } - /** - * Getter for property filename. - * - * @return Value of property filename. - */ public String getFilename() { return this.filename; } - /** - * Getter for property path. - * - * @return Value of property path. - */ public String getPath() { return this.path; } - /** - * Getter for property directory. - * - * @return Value of property directory - */ public String getDirectory() { return this.directory; } - /** - * Setter for property filename. - * - * @param filename New value of property filename. - */ - public void setFilename(String filename) { - this.filename = filename; - } - - /** - * Getter for property line. - * - * @return Value of property line. - */ public String getLine() { return this.line; } - /** - * Setter for property line. - * - * @param line New value of property line. - */ public void setLine(String line) { this.line = line; } - /** - * Getter for property line no. - * - * @return Value of property line no. - */ public String getLineno() { return this.lineno; } - /** - * Setter for property line no. - * - * @param lineno New value of property line no. - */ public void setLineno(String lineno) { this.lineno = lineno; } - /** - * Compare this object to another hit (in order to implement the comparable interface). - * - * @param o The object to compare this object with - * - * @return the result of a toString().compareTo() of the filename - */ - @Override - public int compareTo(Hit o) throws ClassCastException { - return filename.compareTo(o.filename); - } - - /** - * Getter for property binary. - * - * @return Value of property binary. - */ public boolean isBinary() { return this.binary; } - /** - * Setter for property binary. - * - * @param binary New value of property binary. - */ public void setBinary(boolean binary) { this.binary = binary; } @@ -206,19 +149,11 @@ public void setBinary(boolean binary) { */ private String tag; - /** - * Getter for property tag. - * @return Value of property tag. - */ public String getTag() { return this.tag; } - /** - * Setter for property tag. - * @param tag New value of property tag. - */ public void setTag(String tag) { this.tag = tag; @@ -231,23 +166,4 @@ public void setTag(String tag) { public boolean getAlt() { return alt; } - - /** - * Check if two objects are equal. Only consider the {@code filename} field - * to match the return value of the {@link #compareTo(Hit)} method. - * @param o the object to compare with - * @return true if the filenames are equal - */ - @Override - public boolean equals(Object o) { - if (o instanceof Hit) { - return compareTo((Hit) o) == 0; - } - return false; - } - - @Override - public int hashCode() { - return filename.hashCode(); - } } diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/HitTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/HitTest.java index 3bbf99c6fa7..0c99f6e3b1c 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/HitTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/HitTest.java @@ -19,15 +19,18 @@ /* * Copyright (c) 2008, 2018 Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2019, Chris Fraire . */ package org.opengrok.indexer.search; -import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import org.junit.Test; import java.io.File; -import static org.junit.Assert.*; - /** * Do basic sanity testing of the Hit class * @@ -37,10 +40,8 @@ public class HitTest { @Test public void testFilename() { - Hit instance = new Hit(); - assertNull(instance.getFilename()); + Hit instance = new Hit("a/b/foobar"); String expResult = "foobar"; - instance.setFilename(expResult); assertEquals(expResult, instance.getFilename()); } @@ -53,7 +54,7 @@ public void testPath() { @Test public void testLine() { - Hit instance = new Hit(); + Hit instance = new Hit("a/b/c"); assertNull(instance.getLine()); String expResult = "This is a line of text"; instance.setLine(expResult); @@ -62,25 +63,16 @@ public void testLine() { @Test public void testLineno() { - Hit instance = new Hit(); + Hit instance = new Hit("a/b"); assertNull(instance.getLineno()); String expResult = "12"; instance.setLineno(expResult); assertEquals(expResult, instance.getLineno()); } - @Test - public void testCompareTo() { - Hit o1 = new Hit("/foo", null, null, false, false); - Hit o2 = new Hit("/foo", "hi", "there", false, false); - assertEquals(o2.compareTo(o1), o1.compareTo(o2)); - o1.setFilename("bar"); - assertFalse(o2.compareTo(o1) == o1.compareTo(o2)); - } - @Test public void testBinary() { - Hit instance = new Hit(); + Hit instance = new Hit("abc"); assertFalse(instance.isBinary()); instance.setBinary(true); assertTrue(instance.isBinary()); @@ -88,7 +80,7 @@ public void testBinary() { @Test public void testTag() { - Hit instance = new Hit(); + Hit instance = new Hit("def"); assertNull(instance.getTag()); String expResult = "foobar"; instance.setTag(expResult); @@ -98,27 +90,9 @@ public void testTag() { @Test public void testAlt() { - Hit instance = new Hit(); + Hit instance = new Hit("ghi/d"); assertFalse(instance.getAlt()); Hit o2 = new Hit(null, null, null, false, true); assertTrue(o2.getAlt()); } - - @Test - public void testEquals() { - Hit o1 = new Hit("/foo", null, null, false, false); - Hit o2 = new Hit("/foo", "hi", "there", false, false); - assertEquals(o2.equals(o1), o1.equals(o2)); - o1.setFilename("bar"); - assertFalse(o2.equals(o1)); - assertFalse(o1.equals(o2)); - assertFalse(o1.equals(new Object())); - } - - @Test - public void testHashCode() { - String filename = "bar"; - Hit instance = new Hit(filename, null, null, false, false); - assertEquals(filename.hashCode(), instance.hashCode()); - } } From 05801be006d266964936a9bd59831642a3537c6f Mon Sep 17 00:00:00 2001 From: Chris Fraire Date: Sun, 24 Mar 2019 18:12:10 -0500 Subject: [PATCH 4/6] Decorate SearchHist as NON_NULL --- .../org/opengrok/web/api/v1/controller/SearchController.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java b/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java index 386b43d68c4..fade00b1587 100644 --- a/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java +++ b/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java @@ -19,10 +19,11 @@ /* * Copyright (c) 2018 Oracle and/or its affiliates. All rights reserved. - * Portions Copyright (c) 2020, Chris Fraire . + * Portions Copyright (c) 2019-2020, Chris Fraire . */ package org.opengrok.web.api.v1.controller; +import com.fasterxml.jackson.annotation.JsonInclude; import org.apache.lucene.search.Query; import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.search.Hit; @@ -214,6 +215,7 @@ public int getEndDocument() { } } + @JsonInclude(JsonInclude.Include.NON_NULL) private static class SearchHit { private final String line; From 801a11b1c2c031bf3a031d3d7bc79d4f5f1c6108 Mon Sep 17 00:00:00 2001 From: Chris Fraire Date: Sun, 24 Mar 2019 19:01:58 -0500 Subject: [PATCH 5/6] Uncomment but Ignore SearchEngineTest.testSearch() --- .../indexer/search/SearchEngineTest.java | 34 ++++++++----------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java index e8b78dd5660..43c24348880 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java @@ -24,22 +24,27 @@ package org.opengrok.indexer.search; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + import java.io.File; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import java.util.TreeSet; -import org.junit.After; import org.junit.AfterClass; -import org.junit.Before; import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.history.HistoryGuru; +import org.opengrok.indexer.history.RepositoryFactory; import org.opengrok.indexer.index.Indexer; import org.opengrok.indexer.util.TestRepository; -import static org.junit.Assert.*; -import org.opengrok.indexer.history.RepositoryFactory; - /** * Do basic testing of the SearchEngine * @@ -47,8 +52,8 @@ */ public class SearchEngineTest { - static TestRepository repository; - static File configFile; + private static TestRepository repository; + private static File configFile; @BeforeClass public static void setUpClass() throws Exception { @@ -75,19 +80,11 @@ public static void setUpClass() throws Exception { } @AfterClass - public static void tearDownClass() throws Exception { + public static void tearDownClass() { repository.destroy(); configFile.delete(); } - @Before - public void setUp() { - } - - @After - public void tearDown() { - } - @Test public void testIsValidQuery() { SearchEngine instance = new SearchEngine(); @@ -142,7 +139,7 @@ public void testSymbol() { } @Test - public void testGetQuery() throws Exception { + public void testGetQuery() { SearchEngine instance = new SearchEngine(); instance.setHistory("Once upon a time"); instance.setFile("Makefile"); @@ -154,8 +151,8 @@ public void testGetQuery() throws Exception { instance.getQuery()); } - /* see https://github.com/oracle/opengrok/issues/2030 @Test + @Ignore("See https://github.com/oracle/opengrok/issues/2030") public void testSearch() { List hits = new ArrayList<>(); @@ -282,5 +279,4 @@ public void testSearch() { assertEquals(1, instance.search()); instance.destroy(); } - */ } From bd1e3aee8620efe8b449cbb608bbf65042103512 Mon Sep 17 00:00:00 2001 From: Chris Fraire Date: Sat, 23 Mar 2019 21:58:39 -0500 Subject: [PATCH 6/6] Add HitFormatter and {Search}Hit substring properties Addresses the source-code related part of #2612 since `OGKUnifiedHighlighter` allows straight- forward, alternate formatters. `HistoryContext` is still a custom highlighting, so it will still return HTML-like content. --- .../java/org/opengrok/indexer/search/Hit.java | 38 ++++++ .../opengrok/indexer/search/HitFormatter.java | 125 ++++++++++++++++++ .../opengrok/indexer/search/SearchEngine.java | 114 +++++++++------- .../indexer/search/SearchFormatterBase.java | 119 +++++++++++++++++ .../indexer/search/context/Context.java | 70 +++++++++- .../search/context/ContextFormatter.java | 87 ++---------- .../search/context/OGKUnifiedHighlighter.java | 102 ++++++++++---- .../SearchAndContextFormatterTest.java | 2 +- .../SearchAndContextFormatterTest2.java | 2 +- .../api/v1/controller/SearchController.java | 19 ++- 10 files changed, 521 insertions(+), 157 deletions(-) create mode 100644 opengrok-indexer/src/main/java/org/opengrok/indexer/search/HitFormatter.java create mode 100644 opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchFormatterBase.java diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Hit.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Hit.java index d5bdd774b2b..64d1e5f4aa4 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Hit.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Hit.java @@ -68,6 +68,16 @@ public class Hit { */ private final String path; + /** + * A phrase match's left offset (inclusive) within the line. + */ + private Integer left; + + /** + * A phrase match's right offset (exclusive) within the line. + */ + private Integer right; + /** * Creates a new, possibly-defined instance. * @@ -166,4 +176,32 @@ public void setTag(String tag) { public boolean getAlt() { return alt; } + + /** + * Gets the left line offset (inclusive) of a phrase match. + */ + public Integer getLeft() { + return this.left; + } + + /** + * Sets the left line offset (inclusive) of a phrase match. + */ + public void setLeft(Integer left) { + this.left = left; + } + + /** + * Gets the right line offset (exclusive) of a phrase match. + */ + public Integer getRight() { + return this.right; + } + + /** + * Sets the right line offset (exclusive) of a phrase match. + */ + public void setRight(Integer right) { + this.right = right; + } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/HitFormatter.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/HitFormatter.java new file mode 100644 index 00000000000..f3087a8c234 --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/HitFormatter.java @@ -0,0 +1,125 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018-2019, Chris Fraire . + */ + +package org.opengrok.indexer.search; + +import org.apache.lucene.search.uhighlight.Passage; +import org.apache.lucene.search.uhighlight.PassageFormatter; +import org.opengrok.indexer.analysis.Definitions; +import org.opengrok.indexer.search.context.ContextArgs; +import org.opengrok.indexer.search.context.LineHighlight; +import org.opengrok.indexer.search.context.PassageConverter; +import org.opengrok.indexer.search.context.PhraseHighlight; +import org.opengrok.indexer.util.SourceSplitter; +import org.opengrok.indexer.util.StringUtils; + +import java.util.ArrayList; +import java.util.List; +import java.util.SortedMap; +import java.util.regex.Matcher; + +/** + * Represents a subclass of {@link PassageFormatter} that uses + * {@link PassageConverter} to produce {@link Hit} instances. + */ +public class HitFormatter extends SearchFormatterBase { + + private String filename; + + /** + * Initializes a formatter for the specified arguments. + */ + public HitFormatter() { + super(new PassageConverter(new ContextArgs((short) 0, Short.MAX_VALUE))); + } + + /** + * Gets the source code file name, including optional path. + * @return the full path or {@code null} + */ + public String getFilename() { + return filename; + } + + /** + * Sets the source code file name. + * @param value the file name to use + */ + public void setFilename(String value) { + this.filename = value; + } + + /** + * Splits {@code originalText} using {@link SourceSplitter}, converts + * passages using {@link PassageConverter}, and formats for returning hits + * through the search API. + * @param passages a required instance + * @param originalText a required instance + * @return a defined list of {@link Hit} instances, which might be empty + */ + @Override + public Object format(Passage[] passages, String originalText) { + + updateOriginalText(originalText); + + SortedMap lines = cvt.convert(passages, splitter); + List res = new ArrayList<>(); + for (LineHighlight lhi : lines.values()) { + final int lineOffset = lhi.getLineno(); + + String line = splitter.getLine(lineOffset); + Matcher eolMatcher = StringUtils.STANDARD_EOL.matcher(line); + if (eolMatcher.find()) { + line = line.substring(0, eolMatcher.start()); + } + + for (int i = 0; i < lhi.countMarkups(); ++i) { + marks.clear(); + PhraseHighlight phi = lhi.getMarkup(i); + checkIfMark(line, phi); + + Hit hit = new Hit(filename); + // `binary' is false + hit.setLine(line); + hit.setLineno(String.valueOf(lineOffset + 1)); // to 1-offset + hit.setLeft(phi.getLineStart()); + hit.setRight(phi.getLineEnd()); + + if (defs != null) { + // N.b. use ctags 1-offset vs 0-offset. + List lineTags = defs.getTags(lineOffset + 1); + if (lineTags != null) { + Definitions.Tag pickedTag = findTagForMark(lineTags, marks); + if (pickedTag != null) { + hit.setTag(pickedTag.type); + } + } + } + + res.add(hit); + } + } + + return res; + } +} diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java index d3cdab7933c..279b8cb7937 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java @@ -19,7 +19,7 @@ /* * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. - * Portions Copyright (c) 2018, Chris Fraire . + * Portions Copyright (c) 2018-2019, Chris Fraire . */ package org.opengrok.indexer.search; @@ -31,7 +31,6 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.SortedSet; @@ -91,6 +90,9 @@ public class SearchEngine { */ public static final Version LUCENE_VERSION = Version.LATEST; public static final String LUCENE_VERSION_HELP = LUCENE_VERSION.major + "_" + LUCENE_VERSION.minor + "_" + LUCENE_VERSION.bugfix; + + private final RuntimeEnvironment env; + /** * Holds value of property definition. */ @@ -129,20 +131,25 @@ public class SearchEngine { private final char[] content = new char[1024 * 8]; private String source; private String data; - int hitsPerPage = RuntimeEnvironment.getInstance().getHitsPerPage(); - int cachePages = RuntimeEnvironment.getInstance().getCachePages(); - int totalHits = 0; + private int hitsPerPage; + private int cachePages; + private int totalHits; private ScoreDoc[] hits; private TopScoreDocCollector collector; + private IndexReader reader; + private SettingsHelper settingsHelper; private IndexSearcher searcher; - boolean allCollected; + private boolean allCollected; private final ArrayList searcherList = new ArrayList<>(); /** * Creates a new instance of SearchEngine. */ public SearchEngine() { + env = RuntimeEnvironment.getInstance(); docs = new ArrayList<>(); + hitsPerPage = env.getHitsPerPage(); + cachePages = env.getCachePages(); } /** @@ -178,12 +185,12 @@ public boolean isValidQuery() { * @param paging whether to use paging (if yes, first X pages will load * faster) * @param root which db to search - * @throws IOException */ private void searchSingleDatabase(File root, boolean paging) throws IOException { - IndexReader ireader = DirectoryReader.open(FSDirectory.open(root.toPath())); - searcher = new IndexSearcher(ireader); - searchIndex(searcher, paging); + reader = DirectoryReader.open(FSDirectory.open(root.toPath())); + settingsHelper = new SettingsHelper(reader); + searcher = new IndexSearcher(reader); + searchReader(paging); } /** @@ -191,7 +198,6 @@ private void searchSingleDatabase(File root, boolean paging) throws IOException * @param paging whether to use paging (if yes, first X pages will load * faster) * @param root list of projects to search - * @throws IOException */ private void searchMultiDatabase(List root, boolean paging) throws IOException { SortedSet projects = new TreeSet<>(); @@ -202,13 +208,14 @@ private void searchMultiDatabase(List root, boolean paging) throws IOEx // We use MultiReader even for single project. This should // not matter given that MultiReader is just a cheap wrapper // around set of IndexReader objects. - MultiReader searchables = RuntimeEnvironment.getInstance(). - getMultiReader(projects, searcherList); + MultiReader searchables = env.getMultiReader(projects, searcherList); + reader = searchables; + settingsHelper = new SettingsHelper(reader); searcher = new IndexSearcher(searchables); - searchIndex(searcher, paging); + searchReader(paging); } - private void searchIndex(IndexSearcher searcher, boolean paging) throws IOException { + private void searchReader(boolean paging) throws IOException { collector = TopScoreDocCollector.create(hitsPerPage * cachePages, Short.MAX_VALUE); searcher.search(query, collector); totalHits = collector.getTotalHits(); @@ -276,7 +283,7 @@ public IndexSearcher getSearcher() { * @return The number of hits */ public int search(List projects) { - return search(projects, new File(RuntimeEnvironment.getInstance().getDataRootFile(), IndexDatabase.INDEX_DIR)); + return search(projects, new File(env.getDataRootFile(), IndexDatabase.INDEX_DIR)); } /** @@ -293,7 +300,6 @@ public int search(List projects) { * @return The number of hits */ public int search() { - RuntimeEnvironment env = RuntimeEnvironment.getInstance(); return search( env.hasProjects() ? env.getProjectList() : new ArrayList<>(), new File(env.getDataRootFile(), IndexDatabase.INDEX_DIR)); @@ -311,8 +317,8 @@ public int search() { * @return The number of hits */ private int search(List projects, File root) { - source = RuntimeEnvironment.getInstance().getSourceRootPath(); - data = RuntimeEnvironment.getInstance().getDataRootPath(); + source = env.getSourceRootPath(); + data = env.getDataRootPath(); docs.clear(); QueryBuilder newBuilder = createQueryBuilder(); @@ -422,61 +428,55 @@ public void results(int start, int end, List ret) { Level.WARNING, SEARCH_EXCEPTION_MSG, e); } hits = collector.topDocs().scoreDocs; - Document d = null; for (int i = start; i < hits.length; i++) { int docId = hits[i].doc; + Document d = null; try { d = searcher.doc(docId); } catch (Exception e) { - LOGGER.log( - Level.SEVERE, SEARCH_EXCEPTION_MSG, e); + LOGGER.log(Level.SEVERE, SEARCH_EXCEPTION_MSG, e); } docs.add(d); } allCollected = true; } - //TODO generation of ret(results) could be cashed and consumers of engine would just print them in whatever + //TODO generation of ret(results) could be cashed and consumers of engine would just print them in whatever form // form they need, this way we could get rid of docs // the only problem is that count of docs is usually smaller than number of results - for (int ii = start; ii < end; ++ii) { - boolean alt = (ii % 2 == 0); + for (int i = start; i < end; ++i) { + boolean alt = (i % 2 == 0); boolean hasContext = false; try { - Document doc = docs.get(ii); - String filename = doc.get(QueryBuilder.PATH); + int docId = hits[i].doc; + Document doc = docs.get(i); + if (doc == null) { + continue; + } + String filename = doc.get(QueryBuilder.PATH); AbstractAnalyzer.Genre genre = AbstractAnalyzer.Genre.get(doc.get(QueryBuilder.T)); - Definitions tags = null; - IndexableField tagsField = doc.getField(QueryBuilder.TAGS); - if (tagsField != null) { - tags = Definitions.deserialize(tagsField.binaryValue().bytes); - } - Scopes scopes = null; - IndexableField scopesField = doc.getField(QueryBuilder.SCOPES); - if (scopesField != null) { - scopes = Scopes.deserialize(scopesField.binaryValue().bytes); - } - int nhits = docs.size(); if (sourceContext != null) { sourceContext.toggleAlt(); + Project proj = Project.getProject(filename); try { if (AbstractAnalyzer.Genre.PLAIN == genre && (source != null)) { - // SRCROOT is read with UTF-8 as a default. - hasContext = sourceContext.getContext( - new InputStreamReader(new FileInputStream( - source + filename), StandardCharsets.UTF_8), - null, null, null, filename, tags, nhits > 100, - false, ret, scopes); + int tabSize = settingsHelper.getTabSize(proj); + List contextHits = sourceContext.getHits( + searcher, docId, tabSize, filename); + if (contextHits != null) { + hasContext = true; + ret.addAll(contextHits); + } } else if (AbstractAnalyzer.Genre.XREFABLE == genre && data != null && summarizer != null) { int l; - /** + /* * For backward compatibility, read the * OpenGrok-produced document using the system * default charset. */ - try (Reader r = RuntimeEnvironment.getInstance().isCompressXref() + try (Reader r = env.isCompressXref() ? new HTMLStripCharFilter(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream( TandemPath.join(data + Prefix.XREF_P + filename, ".gz")))))) : new HTMLStripCharFilter(new BufferedReader(new FileReader(data + Prefix.XREF_P + filename)))) { @@ -499,11 +499,11 @@ public void results(int start, int end, List ret) { } } else { LOGGER.log(Level.WARNING, "Unknown genre: {0} for {1}", new Object[]{genre, filename}); - hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret, scopes); + hasContext = getFallbackContext(ret, doc, filename); } } catch (FileNotFoundException exp) { LOGGER.log(Level.WARNING, "Couldn''t read summary from {0} ({1})", new Object[]{filename, exp.getMessage()}); - hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret, scopes); + hasContext = getFallbackContext(ret, doc, filename); } } if (historyContext != null) { @@ -636,4 +636,22 @@ public String getType() { public void setType(String fileType) { this.type = fileType; } + + private boolean getFallbackContext(List ret, Document doc, String filename) + throws ClassNotFoundException, IOException { + Definitions tags = null; + IndexableField tagsField = doc.getField(QueryBuilder.TAGS); + if (tagsField != null) { + tags = Definitions.deserialize(tagsField.binaryValue().bytes); + } + + Scopes scopes = null; + IndexableField scopesField = doc.getField(QueryBuilder.SCOPES); + if (scopesField != null) { + scopes = Scopes.deserialize(scopesField.binaryValue().bytes); + } + + return sourceContext.getContext(null, null, null, null, filename, tags, + false, false, ret, scopes); + } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchFormatterBase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchFormatterBase.java new file mode 100644 index 00000000000..9c76f21712c --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchFormatterBase.java @@ -0,0 +1,119 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018-2019, Chris Fraire . + */ + +package org.opengrok.indexer.search; + +import org.apache.lucene.search.uhighlight.PassageFormatter; +import org.opengrok.indexer.analysis.Definitions; +import org.opengrok.indexer.analysis.Definitions.Tag; +import org.opengrok.indexer.search.context.PassageConverter; +import org.opengrok.indexer.search.context.PhraseHighlight; +import org.opengrok.indexer.util.SourceSplitter; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +/** + * Represents an abstract base class for highlighting formatters. + */ +public abstract class SearchFormatterBase extends PassageFormatter { + + /** + * Matches a non-word character. + */ + private static final Pattern NONWORD_CHAR = Pattern.compile("(?U)\\W"); + + protected final PassageConverter cvt; + protected final List marks = new ArrayList<>(); + protected Definitions defs; + + /** + * Cached splitter, keyed by {@link #originalText}. + */ + protected SourceSplitter splitter; + private String originalText; + + /** + * Gets the optional definitions. + * @return the defs + */ + public Definitions getDefs() { + return defs; + } + + /** + * Sets the optional definitions. + * @param value definitions + */ + public void setDefs(Definitions value) { + this.defs = value; + } + + protected SearchFormatterBase(PassageConverter converter) { + cvt = converter; + } + + /** + * If the highlight is a sub-string wholly within the line, add it to the + * {@link #marks} list. + */ + protected void checkIfMark(String line, PhraseHighlight phi) { + if (phi.getLineStart() >= 0 && phi.getLineEnd() <= line.length()) { + marks.add(line.substring(phi.getLineStart(), phi.getLineEnd())); + } + } + + protected void updateOriginalText(String originalText) { + if (this.originalText == null || !this.originalText.equals(originalText)) { + splitter = new SourceSplitter(); + splitter.reset(originalText); + this.originalText = originalText; + } + } + + /** + * Search the cross product of {@code linetags} and {@code marks} for any + * mark that starts with a {@link Tag#symbol} and where any subsequent + * character is a non-word ({@code (?U)\W}) character. + * @return a defined instance or {@code null} + */ + protected Tag findTagForMark(List linetags, List marks) { + for (Tag tag : linetags) { + if (tag.type != null) { + for (String mark : marks) { + if (mark.startsWith(tag.symbol) && (mark.length() == tag.symbol.length() || + isNonWord(mark.charAt(tag.symbol.length())))) { + return tag; + } + } + } + } + return null; + } + + private static boolean isNonWord(char c) { + String cword = String.valueOf(c); + return NONWORD_CHAR.matcher(cword).matches(); + } +} diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java index fe74f276477..c95e3110d62 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java @@ -46,6 +46,7 @@ import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.logger.LoggerFactory; import org.opengrok.indexer.search.Hit; +import org.opengrok.indexer.search.HitFormatter; import org.opengrok.indexer.search.QueryBuilder; import org.opengrok.indexer.util.IOUtils; import org.opengrok.indexer.web.Util; @@ -202,8 +203,7 @@ public boolean getContext2(IndexSearcher searcher, formatter.setMoreUrl(moreURL); formatter.setMoreLimit(linelimit); - OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, - searcher, anz); + OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(searcher, anz); uhi.setBreakIterator(StrictLineBreakIterator::new); uhi.setFormatter(formatter); uhi.setTabSize(tabSize); @@ -228,6 +228,72 @@ public boolean getContext2(IndexSearcher searcher, return false; } + /** + * Look for context for this instance's initialized query in a search result + * {@link Document}, and output according to the parameters. + * @param searcher required search that produced the document + * @param docId document ID for producing context + * @param tabSize optional positive tab size that must accord with the value + * used when indexing or else postings may be wrongly shifted until + * re-indexing + * @param filename the source document filename + * @return a defined instance if hits are found or {@code null} + */ + public List getHits(IndexSearcher searcher, int docId, int tabSize, String filename) { + + if (isEmpty()) { + return null; + } + + Document doc; + try { + doc = searcher.doc(docId); + } catch (IOException e) { + LOGGER.log(Level.WARNING, "ERROR getting searcher doc(int)", e); + return null; + } + + Definitions tags = null; + try { + IndexableField tagsField = doc.getField(QueryBuilder.TAGS); + if (tagsField != null) { + tags = Definitions.deserialize(tagsField.binaryValue().bytes); + } + } catch (ClassNotFoundException | IOException e) { + LOGGER.log(Level.WARNING, "ERROR Definitions.deserialize(...)", e); + return null; + } + + /* + * UnifiedHighlighter demands an analyzer "even if in some + * circumstances it isn't used"; here it is not meant to be used. + */ + PlainAnalyzerFactory fac = PlainAnalyzerFactory.DEFAULT_INSTANCE; + AbstractAnalyzer anz = fac.getAnalyzer(); + + HitFormatter formatter = new HitFormatter(); + formatter.setDefs(tags); + formatter.setFilename(filename); + + OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(searcher, anz); + uhi.setBreakIterator(StrictLineBreakIterator::new); + uhi.setFormatter(formatter); + uhi.setTabSize(tabSize); + + try { + List fieldList = qbuilder.getContextFields(); + String[] fields = fieldList.toArray(new String[0]); + return uhi.highlightFieldHits(fields, query, docId); + } catch (IOException e) { + LOGGER.log(Level.WARNING, "ERROR highlightFieldHits(...)", e); + // Continue below. + } catch (Throwable e) { + LOGGER.log(Level.SEVERE, "ERROR highlightFieldHits(...)", e); + throw e; + } + return null; + } + /** * Build the {@code queryAsURI} string that holds the query in a form * that's suitable for sending it as part of a URI. diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/ContextFormatter.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/ContextFormatter.java index e2f118c58c7..3a9dc039735 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/ContextFormatter.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/ContextFormatter.java @@ -18,25 +18,23 @@ */ /* - * Copyright (c) 2018, 2020, Chris Fraire . + * Copyright (c) 2018-2020, Chris Fraire . */ package org.opengrok.indexer.search.context; import java.io.IOException; -import java.util.ArrayList; import java.util.List; import java.util.SortedMap; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.apache.lucene.search.uhighlight.Passage; import org.apache.lucene.search.uhighlight.PassageFormatter; -import org.opengrok.indexer.analysis.Definitions; import org.opengrok.indexer.analysis.Definitions.Tag; import org.opengrok.indexer.analysis.Scopes; import org.opengrok.indexer.logger.LoggerFactory; +import org.opengrok.indexer.search.SearchFormatterBase; import org.opengrok.indexer.util.SourceSplitter; import org.opengrok.indexer.util.StringUtils; import org.opengrok.indexer.web.HtmlConsts; @@ -46,22 +44,14 @@ * Represents a subclass of {@link PassageFormatter} that uses * {@link PassageConverter}. */ -public class ContextFormatter extends PassageFormatter { +public class ContextFormatter extends SearchFormatterBase { private static final String MORE_LABEL = "[all " + HtmlConsts.HELLIP + "]"; private static final Logger LOGGER = LoggerFactory.getLogger( ContextFormatter.class); - /** - * Matches a non-word character. - */ - private static final Pattern NONWORD_CHAR = Pattern.compile("(?U)\\W"); - - private final PassageConverter cvt; - private final List marks = new ArrayList<>(); private String url; - private Definitions defs; private Scopes scopes; /** @@ -71,18 +61,12 @@ public class ContextFormatter extends PassageFormatter { private String moreUrl; private int moreLimit; - /** - * Cached splitter, keyed by {@link #originalText}. - */ - private SourceSplitter splitter; - private String originalText; - /** * Initializes a formatter for the specified arguments. * @param args required instance */ - public ContextFormatter(ContextArgs args) { - this.cvt = new PassageConverter(args); + ContextFormatter(ContextArgs args) { + super(new PassageConverter(args)); } /** @@ -148,22 +132,6 @@ public void setMoreLimit(int value) { this.moreLimit = value; } - /** - * Gets the optional definitions. - * @return the defs - */ - public Definitions getDefs() { - return defs; - } - - /** - * Sets the optional definitions. - * @param value definitions - */ - public void setDefs(Definitions value) { - this.defs = value; - } - /** * Gets the optional scopes to use. * @return the scopes @@ -197,12 +165,7 @@ public Object format(Passage[] passages, String originalText) { throw new IllegalStateException("Url property is null"); } - if (this.originalText == null || !this.originalText.equals( - originalText)) { - splitter = new SourceSplitter(); - splitter.reset(originalText); - this.originalText = originalText; - } + updateOriginalText(originalText); FormattedLines res = new FormattedLines(); StringBuilder bld = new StringBuilder(); @@ -239,15 +202,7 @@ public Object format(Passage[] passages, String originalText) { PhraseHighlight phi = lhi.getMarkup(hioff++); - /* - * If the highlight is a sub-string wholly within the - * line, add it to the `marks' list. - */ - if (phi.getLineStart() >= 0 && - phi.getLineEnd() <= line.length()) { - marks.add(line.substring(phi.getLineStart(), - phi.getLineEnd())); - } + checkIfMark(line, phi); // Append any line text preceding the phrase highlight ... if (phi.getLineStart() >= 0) { @@ -272,7 +227,7 @@ public Object format(Passage[] passages, String originalText) { finishLine(bld, lhi.getLineno(), marks); // Regardless of true EOL, write a
. bld.append(HtmlConsts.BR); - /** + /* * Appending a LF here would hurt the more.jsp view, while * search.jsp (where getContext() does it) is indifferent -- so * skip it. @@ -352,30 +307,4 @@ private void writeTag(int lineOffset, Appendable dest, List marks) } } } - - /** - * Search the cross product of {@code linetags} and {@code marks} for any - * mark that starts with a {@link Tag#symbol} and where any subsequent - * character is a non-word ({@code (?U)\W}) character. - * @return a defined instance or {@code null} - */ - private Tag findTagForMark(List linetags, List marks) { - for (Tag tag : linetags) { - if (tag.type != null) { - for (String mark : marks) { - if (mark.startsWith(tag.symbol) && (mark.length() == - tag.symbol.length() || isNonWord( - mark.charAt(tag.symbol.length())))) { - return tag; - } - } - } - } - return null; - } - - private static boolean isNonWord(char c) { - String cword = String.valueOf(c); - return NONWORD_CHAR.matcher(cword).matches(); - } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/OGKUnifiedHighlighter.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/OGKUnifiedHighlighter.java index 65c32efb45e..f9f0ab669f8 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/OGKUnifiedHighlighter.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/OGKUnifiedHighlighter.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Portions Copyright (c) 2018, Chris Fraire . + * Portions Copyright (c) 2018-2019, Chris Fraire . */ package org.opengrok.indexer.search.context; @@ -45,6 +45,7 @@ import org.opengrok.indexer.analysis.StreamSource; import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.logger.LoggerFactory; +import org.opengrok.indexer.search.Hit; import org.opengrok.indexer.search.QueryBuilder; import org.opengrok.indexer.util.IOUtils; import org.opengrok.indexer.web.Util; @@ -53,7 +54,7 @@ * Represents a subclass of {@link UnifiedHighlighter} with customizations for * OpenGrok. */ -public class OGKUnifiedHighlighter extends UnifiedHighlighter { +class OGKUnifiedHighlighter extends UnifiedHighlighter { private static final Logger LOGGER = LoggerFactory.getLogger( OGKUnifiedHighlighter.class); @@ -67,21 +68,14 @@ public class OGKUnifiedHighlighter extends UnifiedHighlighter { /** * Initializes an instance with * {@link UnifiedHighlighter#UnifiedHighlighter(org.apache.lucene.search.IndexSearcher, org.apache.lucene.analysis.Analyzer)} - * for the specified {@code indexSearcher} and {@code indexAnalyzer}, and - * stores the {@code env} for later use. - * @param env a required instance + * for the specified {@code indexSearcher} and {@code indexAnalyzer}. * @param indexSearcher a required instance * @param indexAnalyzer a required instance * @throws IllegalArgumentException if any argument is null */ - public OGKUnifiedHighlighter(RuntimeEnvironment env, - IndexSearcher indexSearcher, Analyzer indexAnalyzer) { + OGKUnifiedHighlighter(IndexSearcher indexSearcher, Analyzer indexAnalyzer) { super(indexSearcher, indexAnalyzer); - - if (env == null) { - throw new IllegalArgumentException("env is null"); - } - this.env = env; + env = RuntimeEnvironment.getInstance(); } /** @@ -109,9 +103,10 @@ public void setTabSize(int value) { } /** - * Transiently arranges that {@link #getIndexAnalyzer()} returns a file type - * name-specific analyzer during a subsequent call of - * {@link #highlightFieldsUnionWork(java.lang.String[], org.apache.lucene.search.Query, int, int)}. + * Calls + * {@link #highlightFieldsUnionWork(java.lang.String[], org.apache.lucene.search.Query, int, int)} + * after first transiently arranging that {@link #getIndexAnalyzer()} + * returns a file type name-specific analyzer the call. * @param fields a defined instance * @param query a defined instance * @param docId a valid document ID @@ -119,16 +114,9 @@ public void setTabSize(int value) { * @return a defined instance or else {@code null} if there are no results * @throws IOException if accessing the Lucene document fails */ - public String highlightFieldsUnion(String[] fields, Query query, + String highlightFieldsUnion(String[] fields, Query query, int docId, int lineLimit) throws IOException { - /** - * Setting fileTypeName has to happen before getFieldHighlighter() is - * called by highlightFieldsAsObjects() so that the result of - * getIndexAnalyzer() (if it is called due to requiring ANALYSIS) can be - * influenced by fileTypeName. - */ - Document doc = searcher.doc(docId); - fileTypeName = doc == null ? null : doc.get(QueryBuilder.TYPE); + prepareToGetFieldHighlighter(docId); try { return highlightFieldsUnionWork(fields, query, docId, lineLimit); } finally { @@ -136,6 +124,26 @@ public String highlightFieldsUnion(String[] fields, Query query, } } + /** + * Calls + * {@link #highlightFieldsUnionWork(java.lang.String[], org.apache.lucene.search.Query, int, int)} + * after first transiently arranging that {@link #getIndexAnalyzer()} + * returns a file type name-specific analyzer the call. + * @param fields a defined instance + * @param query a defined instance + * @param docId a valid document ID + * @return a defined instance or else {@code null} if there are no results + * @throws IOException if accessing the Lucene document fails + */ + List highlightFieldHits(String[] fields, Query query, int docId) throws IOException { + prepareToGetFieldHighlighter(docId); + try { + return highlightFieldHitsWork(fields, query, docId); + } finally { + fileTypeName = null; + } + } + /** * Calls * {@link #highlightFieldsAsObjects(java.lang.String[], org.apache.lucene.search.Query, int[], int[])}, @@ -187,6 +195,41 @@ protected String highlightFieldsUnionWork(String[] fields, Query query, return res.toString(); } + private List highlightFieldHitsWork(String[] fields, Query query, int docId) + throws IOException { + + int[] maxPassagesCopy = new int[fields.length]; + // Effectively unlimited. + Arrays.fill(maxPassagesCopy, Short.MAX_VALUE); + + List res = null; + Map mappedRes = highlightFieldsAsObjects(fields, query, + new int[] {docId}, maxPassagesCopy); + for (Object[] hitsObjs : mappedRes.values()) { + for (Object obj : hitsObjs) { + /* + * Empirical testing showed that the passage could be null if + * the original source text is not available to the highlighter. + */ + if (obj != null) { + if (!(obj instanceof List)) { + return res; + } + + if (res == null) { + res = new ArrayList<>(); + } + for (Object element : (List) obj) { + if (element instanceof Hit) { + res.add((Hit) element); + } + } + } + } + } + return res; + } + /** * Produces original text by reading from OpenGrok source content relative * to {@link RuntimeEnvironment#getSourceRootPath()} and returns the content @@ -326,4 +369,15 @@ private Reader getReader(InputStream in) throws IOException { BufferedReader bufrdr = new BufferedReader(bsrdr); return ExpandTabsReader.wrap(bufrdr, tabSize); } + + /** + * Setting fileTypeName has to happen before getFieldHighlighter() is + * called by highlightFieldsAsObjects() so that the result of + * getIndexAnalyzer() (if it is called due to requiring ANALYSIS) can be + * influenced by fileTypeName. + */ + private void prepareToGetFieldHighlighter(int docId) throws IOException { + Document doc = searcher.doc(docId); + fileTypeName = doc == null ? null : doc.get(QueryBuilder.TYPE); + } } diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/context/SearchAndContextFormatterTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/context/SearchAndContextFormatterTest.java index b91f5dc4ee7..8449424ab70 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/context/SearchAndContextFormatterTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/context/SearchAndContextFormatterTest.java @@ -135,7 +135,7 @@ private String[] getFirstFragments(SearchEngine instance) AbstractAnalyzer anz = fac.getAnalyzer(); ContextFormatter formatter = new ContextFormatter(args); - OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, + OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter( instance.getSearcher(), anz); uhi.setBreakIterator(() -> new StrictLineBreakIterator()); uhi.setFormatter(formatter); diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/context/SearchAndContextFormatterTest2.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/context/SearchAndContextFormatterTest2.java index d82009c1e67..f547e6cbcd5 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/context/SearchAndContextFormatterTest2.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/context/SearchAndContextFormatterTest2.java @@ -195,7 +195,7 @@ private String[] getFirstFragments(SearchEngine instance) AbstractAnalyzer anz = fac.getAnalyzer(); ContextFormatter formatter = new ContextFormatter(args); - OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, + OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter( instance.getSearcher(), anz); uhi.setBreakIterator(StrictLineBreakIterator::new); uhi.setFormatter(formatter); diff --git a/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java b/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java index fade00b1587..4fa3fe7a5f7 100644 --- a/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java +++ b/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java @@ -92,7 +92,8 @@ public SearchResult search( Map> hits = engine.search(req, projects, startDocIndex, maxResults) .stream() .collect(Collectors.groupingBy(Hit::getPath, - Collectors.mapping(h -> new SearchHit(h.getLine(), h.getLineno()), Collectors.toList()))); + Collectors.mapping(h -> new SearchHit(h.getLine(), h.getLineno(), + h.getLeft(), h.getRight()), Collectors.toList()))); long duration = Duration.between(startTime, Instant.now()).toMillis(); @@ -222,9 +223,15 @@ private static class SearchHit { private final String lineNumber; - private SearchHit(final String line, final String lineNumber) { + private final Integer left; + + private final Integer right; + + private SearchHit(String line, String lineNumber, Integer left, Integer right) { this.line = line; this.lineNumber = lineNumber; + this.left = left; + this.right = right; } public String getLine() { @@ -234,6 +241,14 @@ public String getLine() { public String getLineNumber() { return lineNumber; } + + public Integer getLeft() { + return left; + } + + public Integer getRight() { + return right; + } } }