A set of quality control and maintenance queries for the Wikidata SPARQL endpoint (query.wikidata.org)
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT * WHERE {
?gene wdt:P351 ?w .
?gene wdt:P31 wd:Q4167410 .
}
Retrieve all Wikidata items which share a MeSH ID. That can point to duplicate human disease items in Wikidata
SELECT ?d2 ?mesh (COUNT(?mesh) + 1 as ?mesh_count) WHERE {
?d p:P486/ps:P486 ?mesh .
?d2 p:P486/ps:P486 ?mesh .
FILTER (?d != ?d2)
}
GROUP BY ?d2 ?mesh
HAVING (?mesh_count > 1)
ORDER BY ?mesh
SELECT DISTINCT ?gene ?go WHERE {
?gene wdt:P686 ?go .
FILTER(!REGEX(?go, "^GO:[0-9]", "i"))
}
SELECT * WHERE {
?p wdt:P352 ?up .
{?p wdt:P703 wd:Q5} UNION {?p wdt:P703 wd:Q83310} .
FILTER NOT EXISTS {?p wdt:P702 ?enc}
}
SELECT * WHERE {
?g wdt:P353 ?hgnc_symbol .
?g wdt:P703 wd:Q83310 .
}
Get human genes with HGNC symbol but withou HGNC ID (this suggests that the HGNC symbol is wrong, or HGNC ID really missing)
SELECT * WHERE {
{?g wdt:P353 ?hgnc_symbol .} MINUS
{?g wdt:P354 ?hgnc_id .}
}
Get all compounds with CAS numbers which are not 'instance of' or 'subclass of' a chemical compound
SELECT * WHERE {
?cmpnd wdt:P231 ?pc .
FILTER NOT EXISTS{
{?cmpnd wdt:P279 wd:Q11173 .} UNION
{?cmpnd wdt:P31 wd:Q11173 .}
}
}
This suggests that the gene does not have a real HGNC symbol, but the one on the human gene item is actually wrong Execute
SELECT * WHERE {
{?x wdt:P353 ?gs .} MINUS
{?x wdt:P354 ?hgnc_id .}
{?x wdt:P703 wd:Q5 .} MINUS
{?x wdt:P703 wd:Q83310 .}
}
SELECT * WHERE {
?mg wdt:P671 ?mgi .
FILTER(!regex( ?mgi, '^MGI:[0-9]', 'i'))
}
SELECT ?x ?t ?xLabel ?unii WHERE {
?x wdt:P486 ?t FILTER(STRSTARTS(?t, 'M')) .
OPTIONAL {?x wdt:P652 ?unii .}
SERVICE wikibase:label {bd:serviceParam wikibase:language "en" .}
}
SELECT ?c ?cLabel WHERE {
{?c wdt:P683 ?chebi .} MINUS
{?c wdt:P235 [] .}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}