You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@jena.apache.org by "Eetu Mäkelä (JIRA)" <ji...@apache.org> on 2016/05/31 14:02:12 UTC
[jira] [Comment Edited] (JENA-1187) Wrong results/performance
regression when using BIND and graph pattern groups
[ https://issues.apache.org/jira/browse/JENA-1187?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15307763#comment-15307763 ]
Eetu Mäkelä edited comment on JENA-1187 at 5/31/16 2:01 PM:
------------------------------------------------------------
Any data with rdf:type statements should produce the error. The query presented above is just a distilled down sample.
The actual queries are much more complex, with multiple filters, binds and so on. Here's an example (against http://ldf.fi/ancore/sparql, which contains some 4 million triples):
{code}
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX rdve3: <http://rdvocab.info/ElementsGr3/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX oac: <http://www.openannotation.org/ns/>
PREFIX text: <http://jena.apache.org/text#>
SELECT ?id (?mngram AS ?label) ?ngram ?source {
{
VALUES ?ngram {
"lots" "of" "ngrams" "lots of ngrams"
}
FILTER(STRLEN(?ngram)>2 && REGEX(?ngram,"^\\w\\. "))
BIND(REPLACE(?ngram,"#\\d\\b","") AS ?ngram2)
BIND(REPLACE(?ngram2,"^(\\w)\\. ","$1* ") AS ?ngram3)
BIND(REPLACE(?ngram3,"\\b(\\w)","+$1") AS ?q)
?id text:query (?q 3) .
?id rdfs:label ?mngram .
FILTER(STRSTARTS(?mngram,SUBSTR(?ngram2,1,1)))
FILTER(CONTAINS(SUBSTR(?mngram,2),SUBSTR(?ngram2,4)))
BIND(4 AS ?source)
} UNION {
VALUES ?ngram {
"lots" "of" "ngrams" "lots of ngrams"
}
FILTER(STRLEN(?ngram)>2)
BIND(REPLACE(?ngram,"#\\d\\b","") AS ?ngram2)
BIND(REPLACE(?ngram2,"\\W$","") AS ?ngram3)
{
BIND(?ngram3 AS ?mngram)
?c dct:title|skos:prefLabel|rdfs:label|skos:altLabel|foaf:name ?mngram .
BIND(1 AS ?source)
} UNION {
BIND(STRLANG(CONCAT(UCASE(SUBSTR(?ngram3,1,1)),SUBSTR(?ngram3,2)),"la") AS ?mngram)
?c rdfs:label ?mngram .
BIND(3 AS ?source)
}
?c (owl:sameAs|^owl:sameAs|skos:exactMatch|^skos:exactMatch|foaf:primaryTopicOf|^foaf:primaryTopicOf)* ?id .
FILTER NOT EXISTS {
?id a oac:Annotation .
}
} UNION {
SERVICE <http://ldf.fi/dbpedia/sparql> {
SELECT ?mngram ?ngram ?id {
VALUES ?ngram {
"lots" "of" "ngrams" "lots of ngrams"
}
FILTER(STRLEN(?ngram)>2 && UCASE(SUBSTR(?ngram,1,1))=SUBSTR(?ngram,1,1))
BIND(REPLACE(?ngram,"#\\d\\b","") AS ?ngram2)
BIND(REPLACE(?ngram2,"\\W$","") AS ?ngram3)
BIND(STRLANG(?ngram3,"en") AS ?mngram)
?c rdfs:label ?mngram .
FILTER(STRSTARTS(STR(?c),"http://dbpedia.org/resource/"))
FILTER(!STRSTARTS(STR(?c),"http://dbpedia.org/resource/Category:"))
FILTER EXISTS { ?c a ?type }
FILTER NOT EXISTS {
?c dbo:wikiPageDisambiguates ?other .
}
FILTER NOT EXISTS {
?c a dbo:Album .
}
FILTER NOT EXISTS {
?c a dbo:MusicalWork .
}
FILTER NOT EXISTS {
?c a dbo:Year .
}
FILTER NOT EXISTS {
?c a dbo:Company .
}
{
?c dbo:wikiPageRedirects ?id .
} UNION {
FILTER NOT EXISTS {
?c dbo:wikiPageRedirects ?other .
}
BIND(?c as ?id)
}
}
}
BIND(2 AS ?source)
}
}
{code}
Looking at the above query, the issue is also that while in theory I could just push the VALUES, filters and so on down into the UNIONs in most cases, because sometimes I push down hundreds of ngrams at a time, the queries would start to grow in pure characters quite quickly.
was (Author: jiemakel):
Any data with rdf:type statements should produce the error. The query presented above is just a distilled down sample.
The actual queries are much more complex, with multiple filters, binds and so on. Here's an example (against http://ldf.fi/ancore/sparql, which contains some 4 million triples):
{{code}}
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX rdve3: <http://rdvocab.info/ElementsGr3/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX oac: <http://www.openannotation.org/ns/>
PREFIX text: <http://jena.apache.org/text#>
SELECT ?id (?mngram AS ?label) ?ngram ?source {
{
VALUES ?ngram {
"lots" "of" "ngrams" "lots of ngrams"
}
FILTER(STRLEN(?ngram)>2 && REGEX(?ngram,"^\\w\\. "))
BIND(REPLACE(?ngram,"#\\d\\b","") AS ?ngram2)
BIND(REPLACE(?ngram2,"^(\\w)\\. ","$1* ") AS ?ngram3)
BIND(REPLACE(?ngram3,"\\b(\\w)","+$1") AS ?q)
?id text:query (?q 3) .
?id rdfs:label ?mngram .
FILTER(STRSTARTS(?mngram,SUBSTR(?ngram2,1,1)))
FILTER(CONTAINS(SUBSTR(?mngram,2),SUBSTR(?ngram2,4)))
BIND(4 AS ?source)
} UNION {
VALUES ?ngram {
"lots" "of" "ngrams" "lots of ngrams"
}
FILTER(STRLEN(?ngram)>2)
BIND(REPLACE(?ngram,"#\\d\\b","") AS ?ngram2)
BIND(REPLACE(?ngram2,"\\W$","") AS ?ngram3)
{
BIND(?ngram3 AS ?mngram)
?c dct:title|skos:prefLabel|rdfs:label|skos:altLabel|foaf:name ?mngram .
BIND(1 AS ?source)
} UNION {
BIND(STRLANG(CONCAT(UCASE(SUBSTR(?ngram3,1,1)),SUBSTR(?ngram3,2)),"la") AS ?mngram)
?c rdfs:label ?mngram .
BIND(3 AS ?source)
}
?c (owl:sameAs|^owl:sameAs|skos:exactMatch|^skos:exactMatch|foaf:primaryTopicOf|^foaf:primaryTopicOf)* ?id .
FILTER NOT EXISTS {
?id a oac:Annotation .
}
} UNION {
SERVICE <http://ldf.fi/dbpedia/sparql> {
SELECT ?mngram ?ngram ?id {
VALUES ?ngram {
"lots" "of" "ngrams" "lots of ngrams"
}
FILTER(STRLEN(?ngram)>2 && UCASE(SUBSTR(?ngram,1,1))=SUBSTR(?ngram,1,1))
BIND(REPLACE(?ngram,"#\\d\\b","") AS ?ngram2)
BIND(REPLACE(?ngram2,"\\W$","") AS ?ngram3)
BIND(STRLANG(?ngram3,"en") AS ?mngram)
?c rdfs:label ?mngram .
FILTER(STRSTARTS(STR(?c),"http://dbpedia.org/resource/"))
FILTER(!STRSTARTS(STR(?c),"http://dbpedia.org/resource/Category:"))
FILTER EXISTS { ?c a ?type }
FILTER NOT EXISTS {
?c dbo:wikiPageDisambiguates ?other .
}
FILTER NOT EXISTS {
?c a dbo:Album .
}
FILTER NOT EXISTS {
?c a dbo:MusicalWork .
}
FILTER NOT EXISTS {
?c a dbo:Year .
}
FILTER NOT EXISTS {
?c a dbo:Company .
}
{
?c dbo:wikiPageRedirects ?id .
} UNION {
FILTER NOT EXISTS {
?c dbo:wikiPageRedirects ?other .
}
BIND(?c as ?id)
}
}
}
BIND(2 AS ?source)
}
}
{{code}}
Looking at the above query, the issue is also that while in theory I could just push the VALUES, filters and so on down into the UNIONs in most cases, because sometimes I push down hundreds of ngrams at a time, the queries would start to grow in pure characters quite quickly.
> Wrong results/performance regression when using BIND and graph pattern groups
> -----------------------------------------------------------------------------
>
> Key: JENA-1187
> URL: https://issues.apache.org/jira/browse/JENA-1187
> Project: Apache Jena
> Issue Type: Bug
> Components: ARQ
> Affects Versions: Jena 3.1.0, Fuseki 2.4.0
> Reporter: Eetu Mäkelä
>
> I've been using SPARQL queries with BINDs and/or VALUESs that precede UNION blocks. These used to work efficiently (at least in Jena 2 times) with the bound value being bound also in the subpattern, but no longer do. In addition, at certain times, they produce nonsensical results. For example, the below returns {{rdf:type}} statements in the dataset without any regard to ?cl:
> {code}
> SELECT * {
> {
> BIND("nonexistant" AS ?cl)
> {
> BIND(?cl AS ?cl2)
> ?c a ?cl2 .
> } UNION {
> BIND(?cl AS ?cl2)
> ?c a ?cl2 .
> }
> }
> }
> LIMIT 10
> {code}
> while if you change _just one_ of the subpatterns in the union to directly refer to ?cl, it returns an empty result set:
> {code}
> SELECT * {
> {
> BIND("nonexistant" AS ?cl)
> {
> BIND(?cl AS ?cl2)
> ?c a ?cl .
> } UNION {
> BIND(?cl AS ?cl2)
> ?c a ?cl2 .
> }
> }
> }
> LIMIT 10
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)