You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@jena.apache.org by "Shawn Smith (Jira)" <ji...@apache.org> on 2020/01/09 15:37:00 UTC
[jira] [Comment Edited] (JENA-1813) Join optimization transform
results in incorrect query results
[ https://issues.apache.org/jira/browse/JENA-1813?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17011957#comment-17011957 ]
Shawn Smith edited comment on JENA-1813 at 1/9/20 3:36 PM:
-----------------------------------------------------------
This patch appears to fix my specific query and passes all the tests. But it's not obvious to me whether it's generally correct:
{noformat}
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
index 58b1773e9b..c21a872cd2 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
@@ -205,11 +205,16 @@ public class JoinClassifier
/** Find the "effective op" - ie. the one that may be sensitive to linearization */
private static Op effectiveOp(Op op) {
- if ( op instanceof OpExt )
- op = ((OpExt)op).effectiveOp() ;
- while (safeModifier(op))
- op = ((OpModifier)op).getSubOp() ;
- return op ;
+ for (;;) {
+ if (op instanceof OpExt)
+ op = ((OpExt) op).effectiveOp();
+ else if (op instanceof OpGraph)
+ op = ((OpGraph) op).getSubOp();
+ else if (safeModifier(op))
+ op = ((OpModifier) op).getSubOp();
+ else
+ return op;
+ }
}
/** Helper - test for "safe" modifiers */
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
index 05db3910ee..aecd07ec6f 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
@@ -159,6 +159,12 @@ public class TestClassify extends BaseTest
TestClassify.classifyJ(x1, false);
}
+ // JENA-1813
+ @Test public void testClassify_Join_54() {
+ String x1 = "{ ?s ?p ?V GRAPH ?g { SELECT (?w AS ?V) { ?t ?q ?w } GROUP BY ?w } }";
+ TestClassify.classifyJ(x1, false);
+ }
+
public static void classifyJ(String pattern, boolean expected)
{
String qs1 = "PREFIX : <http://example/>\n" ;
{noformat}
was (Author: ssmith):
This patch appears to fix my specific query. But it's not obvious to me whether it's generally correct:
{noformat}
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
index 58b1773e9b..c21a872cd2 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
@@ -205,11 +205,16 @@ public class JoinClassifier
/** Find the "effective op" - ie. the one that may be sensitive to linearization */
private static Op effectiveOp(Op op) {
- if ( op instanceof OpExt )
- op = ((OpExt)op).effectiveOp() ;
- while (safeModifier(op))
- op = ((OpModifier)op).getSubOp() ;
- return op ;
+ for (;;) {
+ if (op instanceof OpExt)
+ op = ((OpExt) op).effectiveOp();
+ else if (op instanceof OpGraph)
+ op = ((OpGraph) op).getSubOp();
+ else if (safeModifier(op))
+ op = ((OpModifier) op).getSubOp();
+ else
+ return op;
+ }
}
/** Helper - test for "safe" modifiers */
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
index 05db3910ee..aecd07ec6f 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
@@ -159,6 +159,12 @@ public class TestClassify extends BaseTest
TestClassify.classifyJ(x1, false);
}
+ // JENA-1813
+ @Test public void testClassify_Join_54() {
+ String x1 = "{ ?s ?p ?V GRAPH ?g { SELECT (?w AS ?V) { ?t ?q ?w } GROUP BY ?w } }";
+ TestClassify.classifyJ(x1, false);
+ }
+
public static void classifyJ(String pattern, boolean expected)
{
String qs1 = "PREFIX : <http://example/>\n" ;
{noformat}
> Join optimization transform results in incorrect query results
> --------------------------------------------------------------
>
> Key: JENA-1813
> URL: https://issues.apache.org/jira/browse/JENA-1813
> Project: Apache Jena
> Issue Type: Bug
> Components: ARQ
> Affects Versions: Jena 3.13.1
> Reporter: Shawn Smith
> Priority: Major
>
> I think I've found a query where TransformJoinStrategy incorrectly decides that a query is linear such that a "join" operation can be replaced by a "sequence" operation. As a result, the query returns incorrect results. Disabling optimizations with "qe.getContext().set(ARQ.optimization, false)" fixes the issue.
> Here's the query:
> {noformat}
> PREFIX : <http://example.com/>
> SELECT ?a
> WHERE {
> GRAPH :graph { :s :p ?a }
> GRAPH :graph {
> SELECT (?b AS ?a)
> WHERE { :t :q ?b }
> GROUP BY ?b
> }
> }
> {noformat}
> Here's the data to test it with (two quads, as Trig):
> {noformat}
> @prefix : <http://example.com/> .
> :graph {
> :s :p "a" .
> :t :q "b" .
> }
> {noformat}
> I expected the query to return zero results because the two GRAPH clauses can't find compatible bindings for ?a. But, in practice, Jena returns ?a="a" and logs a warning:
> {noformat}
> [main] WARN BindingUtils - merge: Mismatch : "a" != "b"{noformat}
> Note the warning is actually coming from QueryIterProjectMerge.java, not BindingUtils.java. With more complicated queries and datasets, this issue can result in thousands or millions of logged warnings.
> The query plan before optimization looks like this:
> {noformat}
> (project (?a)
> (join
> (graph <http://example.com/graph>
> (bgp (triple <http://example.com/s> <http://example.com/p> ?a)))
> (graph <http://example.com/graph>
> (project (?a)
> (extend ((?a ?b))
> (group (?b)
> (bgp (triple <http://example.com/t> <http://example.com/q> ?b))))))))
> {noformat}
> Optimization replaces "join" with "sequence" which fails to detect conflicts on ?a:
> {noformat}
> (project (?a)
> (sequence
> (graph <http://example.com/graph>
> (bgp (triple <http://example.com/s> <http://example.com/p> ?a)))
> (graph <http://example.com/graph>
> (project (?a)
> (extend ((?a ?/b))
> (group (?/b)
> (bgp (triple <http://example.com/t> <http://example.com/q> ?/b))))))))
> {noformat}
> For convenience, here's Java code that reproduces the bug:
> {noformat}
> import org.apache.jena.query.ARQ;
> import org.apache.jena.query.Dataset;
> import org.apache.jena.query.DatasetFactory;
> import org.apache.jena.query.QueryExecution;
> import org.apache.jena.query.QueryExecutionFactory;
> import org.apache.jena.query.ResultSet;
> import org.apache.jena.riot.Lang;
> import org.apache.jena.riot.RDFParser;
> import org.junit.Test;
> public class QueryTest {
> @Test
> public void testGraphQuery() {
> String query = "" +
> "PREFIX : <http://example.com/>\n" +
> "SELECT ?a\n" +
> "WHERE {\n" +
> " GRAPH :graph { :s :p ?a }\n" +
> " GRAPH :graph {\n" +
> " SELECT (?b AS ?a)\n" +
> " WHERE { :t :q ?b }\n" +
> " GROUP BY ?b\n" +
> " }\n" +
> "}\n";
> String data = "" +
> "@prefix : <http://example.com/> .\n" +
> ":graph {\n" +
> " :s :p \"a\" .\n" +
> " :t :q \"b\" .\n" +
> "}\n";
> Dataset ds = DatasetFactory.create();
> RDFParser.fromString(data).lang(Lang.TRIG).parse(ds);
> try (QueryExecution qe = QueryExecutionFactory.create(query, ds)) {
> qe.getContext().set(ARQ.optimization, true); // flipping this to false fixes the test
> ResultSet rs = qe.execSelect();
> if (rs.hasNext()) {
> System.out.println(rs.nextBinding());
> throw new AssertionError("Result set should be empty");
> }
> }
> }
> }
> {noformat}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)