Skip to content

Commit

Permalink
Wikidata Qleverfile without text index
Browse files Browse the repository at this point in the history
For our own endpoint, we add text from the English Wikipedia. But since
this is not part of the Wikidata dataset, we remove that from the
official Qleverfile for Wikidata
  • Loading branch information
Hannah Bast committed Nov 25, 2024
1 parent c6f9643 commit f01639e
Showing 1 changed file with 1 addition and 3 deletions.
4 changes: 1 addition & 3 deletions src/qlever/Qleverfiles/Qleverfile.wikidata
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ GET_DATA_URL = https://dumps.wikimedia.org/wikidatawiki/entities
GET_DATA_CMD = curl -LRC - -O ${GET_DATA_URL}/latest-all.ttl.bz2 -O ${GET_DATA_URL}/latest-lexemes.ttl.bz2 2>&1 | tee wikidata.download-log.txt && curl -sL ${GET_DATA_URL}/dcatap.rdf | docker run -i --rm -v $$(pwd):/data stain/jena riot --syntax=RDF/XML --output=NT /dev/stdin > dcatap.nt
DATE_WIKIDATA = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
DATE_WIKIPEDIA = $$(date -r wikipedia-abstracts.nt +%d.%m.%Y || echo "NO_DATE")
DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${DATE_WIKIDATA}) + English Wikipeda abstracts (version ${DATE_WIKIPEDIA}, available via schema:description)
TEXT_DESCRIPTION = All English and German literals + all sentences from the English Wikipedia (version ${DATE_WIKIPEDIA}), use with FILTER KEYWORDS(...)
DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${DATE_WIKIDATA})

[index]
INPUT_FILES = latest-all.ttl.bz2 latest-lexemes.ttl.bz2 dcatap.nt
Expand All @@ -26,7 +25,6 @@ MULTI_INPUT_JSON = [{ "cmd": "lbzcat -n 4 latest-all.ttl.bz2", "format": "ttl",
{ "cmd": "cat dcatap.nt", "format": "nt", "parallel": "false" }]
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
STXXL_MEMORY = 10G
TEXT_INDEX = from_text_records

[server]
PORT = 7001
Expand Down

0 comments on commit f01639e

Please sign in to comment.