- 
                Notifications
    
You must be signed in to change notification settings  - Fork 666
 
Description
Version
5.5.0
What happened?
Edit: found related issue #3044 and PR #3047
issue description
Some queries will ignore the arq:queryTimeout value and run until completion.
this can cause compounding performance problems if multiple such queries are run.
This has been an issue for us as we have some large datasets 300Gb + where these erroneous
queries can take hours to complete and if you get a few running at the same time, the
CPUs will be consumed with these tasks for multiple hours making the system unresponsive /
seriously degrading performance during this time.
I've done my best to narrow down the simplest forms of the queries that are causing the
problem, but I suspect there will be more cases I am yet to identify.
Reproducing it
Below is a server I set up specifically to test this issue. But I have found the same problem
on a few machines of different sizes and across datasets of 300gb, 5gb, and 300mb.
I have also tested using podman and docker with a dockerized jena-fuseki-server.
And I have tested without the text index and the issue is still there.
VM
Azure VM
Linux (rhel 9.4)
Standard D8ls v6 (8 vcpus, 16 GiB memory)
128Gb SSD (5000 Max IOPS) with xfs filesystem mounted at /etc/fuseki
fuseki
jena-fuseki-server 5.5.0
run as a systemd service using this unit file
Java 21 openjdk
nginx as reverse_proxy
data
300mb of ntriples data
#!/bin/bash
# adapted from https://github.com/qlever-dev/qlever-control/blob/main/src/qlever/Qleverfiles/Qleverfile.freebase
# results in a dataset of ~ 300mb
rm /etc/fuseki/rdf/olympics.nt
wget -nc "https://github.com/wallscope/olympics-rdf/raw/master/data/olympics-nt-nodup.zip" -O /etc/fuseki/rdf/olympics.zip
unzip -qo /etc/fuseki/rdf/olympics.zip -d /etc/fuseki/rdf
rm /etc/fuseki/rdf/olympics.zip
loading
tdb2.tdbloader and jena.textindexer used to create a text indexed TDB2 dataset
specifically, created using this command.
#!/bin/bash
rm -rf /etc/fuseki/databases/ds
time podman run \
  -v "/etc/fuseki/rdf:/rdf:z" \
  -v "/etc/fuseki/databases:/etc/fuseki/databases:z" \
  -v "/etc/fuseki/configuration/ds.ttl:/config.ttl:z" \
  -e "TDB2_MODE=parallel" \
  -e "TEXT=true" \
  --name tdb \
  --rm \
  "ghcr.io/kurrawong/tdb2-generation:master"
assembler
PREFIX :        <#>
PREFIX fuseki:  <http://jena.apache.org/fuseki#>
PREFIX rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs:    <http://www.w3.org/2000/01/rdf-schema#>
PREFIX ja:      <http://jena.hpl.hp.com/2005/11/Assembler#>
PREFIX text:    <http://jena.apache.org/text#>
PREFIX tdb2:    <http://jena.apache.org/2016/tdb#>
[] rdf:type fuseki:Server ;
   fuseki:services (
     :service
   ) .
:service rdf:type fuseki:Service ;
    fuseki:name "ds" ;
    fuseki:endpoint [
        fuseki:operation fuseki:query ;
        fuseki:name "sparql" ;
    ] ;
    fuseki:endpoint [
        fuseki:operation fuseki:query ;
        fuseki:name "query"
    ] ;
    fuseki:endpoint [
        fuseki:operation fuseki:update ;
        fuseki:name "update"
    ] ;
    fuseki:endpoint [
        fuseki:operation fuseki:gsp-r ;
        fuseki:name "get"
    ] ;
    fuseki:endpoint [
        fuseki:operation fuseki:gsp-rw ;
        fuseki:name "data"
    ] ;
    fuseki:endpoint [
        fuseki:operation fuseki:patch ;
        fuseki:name "patch"
    ] ;
    fuseki:dataset :text_dataset ;
    .
:text_dataset rdf:type     text:TextDataset ;
    text:dataset   :dataset_tdb2 ;
    text:index     :indexLucene ;
    .
:indexLucene a text:TextIndexLucene ;
    text:directory "/etc/fuseki/databases/ds/lucene" ;
    text:entityMap :entMap ;
    .
<#entMap> a text:EntityMap ;
    text:entityField      "uri" ;
    text:defaultField     "rdfs-label" ;
    text:uidField         "uid" ;
    text:map (
         [ text:field "rdfs-label" ; text:predicate rdfs:label ]
         )
    .
:dataset_tdb2 rdf:type  tdb2:DatasetTDB ;
    tdb2:location "/etc/fuseki/databases/ds" ;
    ja:context [ ja:cxtName "arq:queryTimeout" ; ja:cxtValue "1000" ] ;
    .
queries
# runs for a very long time and eventually returns 200. ignores 1 sec timeout
PREFIX text: <http://jena.apache.org/text#>
select *
where {
  ?s ?p ?o .
  ?s text:query "totallynotfindingthisstring" .
  ?s ?p ?o .
}
limit 1
# times out after 1 second as expected, unless run after the text query above in which case it will run until finished.
SELECT *
WHERE {
  ?s ?p ?o
}
# runs for about 6s and then causes a 503. ignores 1s timeout.
SELECT *
WHERE {
  ?s ?p ?o
  {
    SELECT *
    WHERE {
      ?s ?p ?o
    }
    limit 1
  }
  ?s ?p ?o
}
limit 1
Relevant output and stacktrace
Are you interested in making a pull request?
None