From 9a7e1bc15341c4d9f6dcc41319e2b5e8b1060ce0 Mon Sep 17 00:00:00 2001 From: CunoD Date: Tue, 20 Jun 2017 11:23:33 +0200 Subject: [PATCH] Python interface Python interface to Semantic Knowledge Graph including example. --- python-interface/README.txt | 93 ++++++++++++++++++++++++++++++++++++ python-interface/example.csv | 11 +++++ python-interface/schema.xml | 39 +++++++++++++++ python-interface/sorl_skg.py | 56 ++++++++++++++++++++++ 4 files changed, 199 insertions(+) create mode 100644 python-interface/README.txt create mode 100644 python-interface/example.csv create mode 100644 python-interface/schema.xml create mode 100644 python-interface/sorl_skg.py diff --git a/python-interface/README.txt b/python-interface/README.txt new file mode 100644 index 0000000..1777999 --- /dev/null +++ b/python-interface/README.txt @@ -0,0 +1,93 @@ +Explanation for running solr_skg.py on Semantic Knowledge Graph + +Cuno Duursma +cuno.duursma@cgi.com + +Tested on Windows 7 Python 2.7 Solr 5.1.0 + +schema.xml shoud be copied to: + semantic-knowledge-graph-master\deploy\solr\server\solr\knowledge-graph\conf\ + If you change schema.xml, make sure to remove documents and restart solr. + + Delete all (!) solr Knowlege Graph data (paste URL in browser): + http://localhost:8983/solr/knowledge-graph/update?stream.body=*:* + Commit delete (paste URL in browser): + http://localhost:8983/solr/knowledge-graph/update?stream.body= + + Restarting solr: + Open command window: + change to directory semantic-knowledge-graph-master\deploy\solr\server + execute: + java -DSTOP.PORT=7983 -DSTOP.KEY=solrrocks -jar start.jar --stop + Ports and Key may vary: see Solr console window in browser: http://localhost:8983/solr/#/ + Go to semantic-knowledge-graph-master\deploy + source restart-solr.sh (e.g. from bash) + Just doing the restart using restart-solr.sh did not work for me. + + +Running solr_skg.py sould produce: + +Knowledge Graph feed result: + +041 + + +Knowledge Graph query: +{ + "min_popularity": 0.0, + "compare": [ + { + "sort": "relatedness", + "limit": 5, + "type": "col1", + "discover_values": "true" + } + ], + "queries": [ + "col1:\"whale\"" + ] +} +Knowledge Graph results: +{ + "data": [ + { + "values": [ + { + "foreground_popularity": 400000.0, + "popularity": 400000.0, + "name": "whale", + "background_popularity": 400000.0, + "relatedness": 0.02618 + }, + { + "foreground_popularity": 200000.0, + "popularity": 200000.0, + "name": "arctic", + "background_popularity": 200000.0, + "relatedness": 0.0163 + }, + { + "foreground_popularity": 200000.0, + "popularity": 200000.0, + "name": "dolphin", + "background_popularity": 200000.0, + "relatedness": 0.0163 + }, + { + "foreground_popularity": 100000.0, + "popularity": 100000.0, + "name": "sea", + "background_popularity": 100000.0, + "relatedness": 0.01097 + } + ], + "type": "col1" + } + ] +} + +Issues: + +- Notice that the popularity results are not converted correctly from JSON (e.g. 400000.0 should be 4.0) +- Relatedness figures are very low for a z-score + diff --git a/python-interface/example.csv b/python-interface/example.csv new file mode 100644 index 0000000..1333876 --- /dev/null +++ b/python-interface/example.csv @@ -0,0 +1,11 @@ +id,col1,col2,col3 +1,dog cat,tone,ten +2,lion zebra,tone,ten +3,whale dolphin,tone,ten +4,swan goose,tone,ten +5,dog home,tone,ten +6,cat home,tone,ten +7,lion zebra zoo,tone,ten +8,whale dolphin sea,tone,ten +9,whale arctic,tone,ten +10,whale arctic,tone,ten diff --git a/python-interface/schema.xml b/python-interface/schema.xml new file mode 100644 index 0000000..b38bc35 --- /dev/null +++ b/python-interface/schema.xml @@ -0,0 +1,39 @@ + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/python-interface/sorl_skg.py b/python-interface/sorl_skg.py new file mode 100644 index 0000000..225a35a --- /dev/null +++ b/python-interface/sorl_skg.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Jun 13 15:08:17 2017 + +@author: cduursma +cuno.duursma@cgi.com +Licenced under Apache License 2.0 + +Python interface to the Semantic Knowledge Graph in Solr +https://github.com/careerbuilder/semantic-knowledge-graph +Tested on Windows 7 Python 2.7 Solr 5.1.0 + +""" + +import requests, json + +# Global Knowledge Graph Query settings +url_query = "http://localhost:8983/solr/knowledge-graph/rel" +url_update = "http://localhost:8983/solr/knowledge-graph/update" +headers_query = {"content-type": "application/json", "Accept-Charset": "UTF-8"} +headers_update = {'Content-type': 'text/csv',"Accept-Charset": "UTF-8"} +params_update = {"commit": "true"} +data_update = open("rr_total2.csv", "rb").read() + +# Example finding "five" in "col1" +query_content = {"queries":["col1:\"whale\""], + "min_popularity":0.0, + "compare":[{"type":"col1", "limit":5, "sort":"relatedness", "discover_values": "true"}]} + + +def feed_skg(data_update): + """Feeds the knowledge graph with data. Data must be a binary openened file matching Knowledeg Graph schema.xml""" + rf = requests.get(url_update, params=params_update, headers=headers_update, data=data_update) + return(rf) + + +def query_skg(query): + """Queries the knowledge graph with query. Query must be a Python set representing JSON object""" + rq = requests.post(url_query, headers=headers_query, json=query) + return(rq) + + +if __name__ == '__main__': + data_update = open("example.csv", "rb").read() + rf=feed_skg(data_update) + print("Knowledge Graph feed result: {0}".format(rf.text)) + parsed_query=json.loads(json.dumps(query_content, indent=2, sort_keys=False)) + print("Knowledge Graph query:") + print(json.dumps(query_content, indent=2)) + rq=query_skg(query_content) + parsed=json.loads(rq.text) + print("Knowledge Graph results:") + print(json.dumps(parsed, indent=2, sort_keys=False)) + + +