Skip to content

Commit

Permalink
Preparing for release
Browse files Browse the repository at this point in the history
  * updated README
  * new version number
  * test all using paging now
  * supports opensearch url templates
  * improved error messages
  * lenient accept headers default
  * removed proxy and anonymous (moved to client)
  • Loading branch information
searsia committed Dec 8, 2017
1 parent b86f8f4 commit e950809
Show file tree
Hide file tree
Showing 21 changed files with 142 additions and 275 deletions.
19 changes: 11 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@ Searsia Server
==============
http://searsia.org

Usage:
Usage:
+ Build with: `mvn package`
+ Run with: `java -jar target/searsiaserver.jar`
+ Run with: `java -jar target/searsiaserver.jar -m <url>`
+ Done.

Connect to the server with the [Federated Web Search Client][1].
More information can be found in the [Searsia Documentation][2],
or you may ask a question under [Searsia Server Issues][3].
The option `-m` is required: It connects your server to an
existing Searsia server, see [Searsia server options][1].
Connect to your server with the [Federated Web Search Client][2].
More information can be found in the [Searsia Documentation][3],
or you may ask a question under [Searsia Server Issues][4].

[1]: http://github.com/searsia/searsiaclient "Searsia Client"
[2]: http://searsia.org "Searsia Documentation"
[3]: http://github.com/searsia/searsiaserver/issues "Issues"
[1]: http://searsia.org/start.html#server
[2]: http://github.com/searsia/searsiaclient "Searsia Client"
[3]: http://searsia.org "Searsia Documentation"
[4]: http://github.com/searsia/searsiaserver/issues "Issues"
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.searsia</groupId>
<artifactId>searsiaserver</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
<prerequisites>
<maven>3.0</maven>
</prerequisites>
Expand Down
83 changes: 60 additions & 23 deletions src/main/java/org/searsia/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;

import org.apache.log4j.Appender;
Expand Down Expand Up @@ -159,9 +161,9 @@ private static String removeFileNameUri(String uri) {
private static String normalizedUriToTemplate(String uri, String rid) {
if (uri != null) {
if (uri.endsWith("/") ) {
uri += rid + "?q={q}";
} else if (!uri.contains("{q")) { // check for tests on searsia.org
uri += "?q={q}";
uri += rid + "?q={searchTerms}&page={startPage?}";
} else if (!uri.contains("{q") && !uri.contains("{searchTerms")) { // check for tests on searsia.org
uri += "?q={searchTerms}&page={startPage?}";
}

}
Expand Down Expand Up @@ -207,25 +209,45 @@ public static String getHashString(String inputString) {

private static void testAll(Resource mother, SearchResult result, Boolean isQuiet) throws SearchException {
int nrFailed = 0;
for (Hit hit: result.getHits()) {
if (hit.getRid() != null) {
try {
Resource engine = mother.searchResource(hit.getRid());
testMother(engine, "none", isQuiet);
} catch (Exception e) {
nrFailed += 1;
printMessage("Test failed: " + e.getMessage(), isQuiet);
boolean isDone = false;
int startPage = mother.getIndexOffset();
Map<String, Boolean> tested = new HashMap<String, Boolean>();
tested.put(mother.getId(), true);
while (!result.getHits().isEmpty() && !isDone) {
isDone = true;
for (Hit hit: result.getHits()) {
String rid = hit.getRid();
if (rid != null && !tested.containsKey(rid)) {
tested.put(rid, true);
isDone = false;
Resource engine = null;
try {
engine = mother.searchResource(hit.getRid());
testEngine(engine, "none", isQuiet);
} catch (Exception e) {
nrFailed += 1;
if (engine == null) { // resource not found, so test did not even start
printMessage("Testing: " + hit.getRid(), isQuiet);
}
printMessage("Test failed: " + e.getMessage(), isQuiet);
}
}
}
}
}
startPage += 1;
try {
result = mother.search(mother.getTestQuery(), "all", startPage);
} catch (Exception e) {
throw new SearchException("Mother error: " + e.getMessage());
}
}
if (nrFailed > 0) {
throw new SearchException(nrFailed + " engines failed.");
}
}


private static void testMother(Resource mother, String debugInfo, Boolean isQuiet) throws SearchException {
printMessage("Testing: " + mother.getName() + " (" + mother.getId() + ")", isQuiet);
private static void testEngine(Resource mother, String debugInfo, Boolean isQuiet) throws SearchException {
printMessage("Testing: " + mother.getId() + " (" + mother.getName() + ")", isQuiet);
SearchResult result = null;
result = mother.search(mother.getTestQuery(), debugInfo);
if (!isQuiet) {
Expand All @@ -249,12 +271,22 @@ private static void testMother(Resource mother, String debugInfo, Boolean isQuie
throw new SearchException("No results for test query." + tip);
}
if (result.getHits().size() < 10) {
printMessage("Warning: less than 10 results for query: " + result.getQuery() + "; see \"testquery\" or \"rerank\".", isQuiet);
printMessage("Warning: less than 10 results for query '" + result.getQuery() + "'; see \"testquery\" or \"rerank\".", isQuiet);
} else if (result.getHits().size() > 49) {
printMessage("Warning: more than 49 results for query: " + result.getQuery(), isQuiet);
printMessage("Warning: more than 49 results for query '" + result.getQuery() + "'", isQuiet);
}
if (debugInfo.equals("all")) {
testAll(mother, result, isQuiet);
String rid = null;
if (result.getResource() != null) {
rid = result.getResource().getId();
}
if (rid != null && rid.equals(mother.getId())) { // do not trust resources if the mother API provides another ID than the mother ID
testAll(mother, result, isQuiet);
} else if (rid == null ){
printMessage("Warning: no resources available.", isQuiet);
} else {
printMessage("Warning: no resources. ID '" + mother.getId() + "' changed to '" + rid + "'", isQuiet);
}
}
}

Expand Down Expand Up @@ -317,15 +349,20 @@ public static void main(String[] args) {
if (!options.getMotherTemplate().matches(".*" + mother.getId() + "[^/]*$")) {
fatalError("API Template (" + options.getMotherTemplate() + "): file name must contain id (" + mother.getId() +")");
}
if (version != null && !version.startsWith("v1")) {
fatalError("Wrong major Searsia version " + version + ": Must be v1.0.0 or higher.");
if (version == null || !version.startsWith("v1")) {
fatalError("Wrong major Searsia version. Must be v1.x.x.");
}


if (mother.getAPITemplate() == null) {
mother.setUrlAPITemplate(options.getMotherTemplate());
} else if (!sameTemplates(mother.getAPITemplate(), options.getMotherTemplate(), mother.getId())) {
printMessage("Warning: Mother changed to " + mother.getAPITemplate(), options.isQuiet());
} else {
if (!sameTemplates(mother.getAPITemplate(), options.getMotherTemplate(), mother.getId())) {
printMessage("Warning: Mother changed to " + mother.getAPITemplate(), options.isQuiet());
}
if (mother.getAPITemplate().contains("{q")) {
printMessage("Warning: API Template parameter {q} is deprecated. Use {searchTerms}.", options.isQuiet());
}
}
myself = mother.getLocalResource();
String fileName = myself.getId() + "_" + getHashString(mother.getAPITemplate());
Expand All @@ -340,7 +377,7 @@ public static void main(String[] args) {
path = tmpDir;
}
try {
testMother(mother, options.getTestOutput(), options.isQuiet());
testEngine(mother, options.getTestOutput(), options.isQuiet());
printMessage("Test succeeded.", options.isQuiet());
} catch (Exception e) {
fatalError("Test failed: " + e.getLocalizedMessage());
Expand Down
36 changes: 4 additions & 32 deletions src/main/java/org/searsia/SearsiaOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;

import org.apache.log4j.Level;
import org.apache.commons.cli.DefaultParser;
Expand All @@ -36,7 +35,6 @@
public class SearsiaOptions {

/* See setDefaults() below */
private Boolean anonymous;
private String test;
private Boolean quiet;
private Boolean help;
Expand All @@ -58,19 +56,18 @@ public class SearsiaOptions {
*/
public SearsiaOptions(String[] args) throws IllegalArgumentException, MalformedURLException {
Options options = new Options();
options.addOption("a", "anonymous",false, "Anonymous traffic by proxying all calls."); // TODO
options.addOption("c", "cache", true, "Set cache size (integer: number of result pages).");
options.addOption("d", "dontshare",false, "Do not share resource definitions."); // TODO
options.addOption("d", "dontshare",false, "Do not share resource definitions.");
options.addOption("e", "export", false, "Export index to stdout and exit.");
options.addOption("h", "help", false, "Show help.");
options.addOption("i", "interval", true, "Set poll interval (integer: in seconds).");
options.addOption("l", "log", true, "Set log level (0=off, 1=error, 2=warn=default, 3=info, 4=debug).");
options.addOption("m", "mother", true, "Set url of mother's web service end point.");
options.addOption("m", "mother", true, "Set url of mother's api web service end point.");
options.addOption("n", "nohealth", false, "Do not share health report.");
options.addOption("p", "path", true, "Set directory path to store the index.");
options.addOption("q", "quiet", false, "No output to console.");
options.addOption("t", "test", true, "Print test output and exit (string: 'json', 'xml', 'response', 'all').");
options.addOption("u", "url", true, "Set url of my web service endpoint.");
options.addOption("u", "url", true, "Set url of my api web service endpoint.");
setDefaults();
parse(options, args);
if (myURI == null) {
Expand All @@ -85,24 +82,7 @@ public SearsiaOptions() {
setDefaults();
}

private String rootDir() {
String rootDir = "searsia";
String urlString = getMotherTemplate();
urlString = urlString.replaceAll("\\{[0-9A-Za-z\\-_]+\\?\\}", "");
try {
URL url = new URL(urlString);
String path = url.getPath();
if (path != null && path.contains("/")) {
path = path.replaceAll("\\/[^\\/]*$", ""); // remove file
path = path.replaceAll("^.+\\/", ""); // remove trailing directories
rootDir = path + "/";
}
} catch (MalformedURLException e) { }
return rootDir;
}

private void setDefaults() {
anonymous = false;
test = null; // no test
help = false;
quiet = false;
Expand Down Expand Up @@ -162,9 +142,6 @@ private void parse(Options options, String[] args) throws IllegalArgumentExcepti
} catch (ParseException e) {
throw new IllegalArgumentException(e.getMessage() + " (use '-h' for help)");
}
if (cmd.hasOption("a")) {
anonymous = true;
}
if (cmd.hasOption("c")) {
cacheSize = new Integer(cmd.getOptionValue("c"));
if (cacheSize < 30) {
Expand Down Expand Up @@ -215,7 +192,7 @@ private void parse(Options options, String[] args) throws IllegalArgumentExcepti
}
if (cmd.hasOption("h") || cmd.getArgs().length < 0 || !cmd.hasOption("m")) {
if (!cmd.hasOption("m")) {
System.out.println("Please provide mother's url template (use '-m').");
System.out.println("Please provide mother's api url template (use '-m').");
}
help(options);
help = true;
Expand Down Expand Up @@ -291,10 +268,6 @@ public String getIndexPath() {
return indexPath;
}

public Boolean isAnonymous() {
return anonymous;
}

public Boolean isQuiet() {
return quiet;
}
Expand Down Expand Up @@ -325,7 +298,6 @@ public String toString() {
result += "\n Poll Interval = " + getPollInterval();
result += "\n Cache Size = " + getCacheSize();
result += "\n Test Output = " + getTestOutput();
result += "\n Anonymous = " + isAnonymous();
result += "\n Do Not Share = " + isNotShared();
result += "\n No Health Rep.= " + isNoHealthReport();
return result;
Expand Down
37 changes: 28 additions & 9 deletions src/main/java/org/searsia/engine/Resource.java
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public class Resource implements Comparable<Resource> {
private final static int defaultPER = 86400000; // unit: miliseconds (86400000 miliseconds is one day)
private final static DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT);

// TODO: private static final Pattern queryPattern = Pattern.compile("\\{q\\??\\}");
// TODO: private static final Pattern queryPattern = Pattern.compile("\\{searchTerms\??\\}");

// data to be set by JSON
private String id = null;
Expand Down Expand Up @@ -293,11 +293,15 @@ public SearchResult randomSearch() throws SearchException {


public SearchResult search(String query) throws SearchException {
return search(query, null);
return search(query, null, null);
}


public SearchResult search(String query, String debug) throws SearchException {
return search(query, debug, null);
}

public SearchResult search(String query, String debug, Integer startPage) throws SearchException {
SearchResult result;
try {
if (rateLimitReached()) {
Expand All @@ -306,7 +310,7 @@ public SearchResult search(String query, String debug) throws SearchException {
if (this.urlAPITemplate == null) {
throw new SearchException("No API Template");
}
String url = fillTemplate(this.urlAPITemplate, URLEncoder.encode(query, "UTF-8"));
String url = fillTemplate(this.urlAPITemplate, URLEncoder.encode(query, "UTF-8"), startPage);
String postString = "";
String postQuery;
if (this.postString != null && !this.postString.equals("")) {
Expand All @@ -321,7 +325,7 @@ public SearchResult search(String query, String debug) throws SearchException {
} else {
postQuery = URLEncoder.encode(query, "UTF-8");
}
postString = fillTemplate(this.postString, postQuery);
postString = fillTemplate(this.postString, postQuery, startPage);
}
String page = getCompletePage(url, postString, this.headers);
if (this.mimeType != null && this.mimeType.equals(SearchResult.SEARSIA_MIME_TYPE)) {
Expand Down Expand Up @@ -376,7 +380,7 @@ public Resource searchResource(String resourceid) throws SearchException {
try {
String newRid = URLEncoder.encode(resourceid, "UTF-8");
url = url.substring(0, lastIndex) + url.substring(lastIndex).replaceFirst(rid, newRid);
url = url.replaceAll("\\{[0-9A-Za-z\\-_]+\\?\\}|\\{q\\}", ""); // remove optional parameters and query
url = fillTemplate(url, "", null);
String jsonPage = getCompletePage(url, this.postString, this.headers);
JSONObject json = new JSONObject(jsonPage);
if (json.has("resource")) {
Expand Down Expand Up @@ -462,7 +466,7 @@ private SearchResult xpathSearch(String url, String page, String debug)
}
XPathFactory xFactory = XPathFactory.newInstance();
XPath xpath = xFactory.newXPath();
NodeList xmlNodeList = (NodeList) xpath.evaluate(itemXpath, document, XPathConstants.NODESET);
NodeList xmlNodeList = (NodeList) xpath.evaluate(this.itemXpath, document, XPathConstants.NODESET);
for (int i = 0; i < xmlNodeList.getLength() && i < 30; i++) {
Node item = xmlNodeList.item(i);
result.addHit(extractHit(item));
Expand Down Expand Up @@ -525,11 +529,22 @@ private Document parseDocumentXML(String xmlString) {
}

private String fillTemplate(String template, String query) throws SearchException {
return fillTemplate(template, query, null);
}

private String fillTemplate(String template, String query, Integer startPage) throws SearchException {
String url = template;
for (String param: getPrivateParameterKeys()) {
url = url.replaceAll("\\{" + param + "\\??\\}", getPrivateParameter(param));
}
url = url.replaceAll("\\{q\\??\\}", query);
url = url.replaceAll("\\{searchTerms\\??\\}", query); // opensearch standard
url = url.replaceAll("\\{q\\??\\}", query); // old Searsia
if (startPage == null) {
startPage = this.getIndexOffset();
url = url.replaceAll("\\{startPage\\}", startPage.toString());
} else {
url = url.replaceAll("\\{startPage\\??\\}", startPage.toString());
}
url = url.replaceAll("\\{[0-9A-Za-z\\-_]+\\?\\}", ""); // remove optional parameters
if (url.matches(".*\\{[0-9A-Za-z\\-_]+\\}.*")) {
String param = url.substring(url.indexOf("{"), url.indexOf("}") + 1);
Expand All @@ -544,6 +559,7 @@ private String fillTemplate(String template, String query) throws SearchExceptio

private SearchException createPrivateSearchException(Exception e) {
String message = e.toString();
message = message.replaceAll("java\\.[a-z]+\\.", "");
for (String param: getPrivateParameterKeys()) {
message = message.replaceAll(getPrivateParameter(param), "{" + param + "}");
}
Expand All @@ -570,11 +586,10 @@ private boolean rateLimitReached() {
}
}


private URLConnection setConnectionProperties(URL url, Map<String, String> headers) throws IOException {
URLConnection connection = url.openConnection();
connection.setRequestProperty("User-Agent", "Searsia/1.0");
connection.setRequestProperty("Accept", this.mimeType); //TODO: "*/*"
connection.setRequestProperty("Accept", this.mimeType + "; q=1.0, */*; q=0.5");
connection.setRequestProperty("Accept-Language", "en-US,en;q=0.5"); // TODO: from browser?
for (Map.Entry<String, String> entry : headers.entrySet()) {
String value = entry.getValue();
Expand Down Expand Up @@ -851,6 +866,10 @@ public Long getLastUsedSecondsAgo() {
return secondsAgo(this.lastUsed);
}

public int getIndexOffset() {
return 1; // TODO: indexOffSet of opensearch url template syntax
}

public boolean isHealthy() {
return this.lastUsedOk >= this.lastUsedError || this.nrOfError == 0;
}
Expand Down
Loading

0 comments on commit e950809

Please sign in to comment.