You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@jena.apache.org by "Shawn Smith (Jira)" <ji...@apache.org> on 2020/01/09 15:37:00 UTC

[jira] [Comment Edited] (JENA-1813) Join optimization transform results in incorrect query results

    [ https://issues.apache.org/jira/browse/JENA-1813?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17011957#comment-17011957 ] 

Shawn Smith edited comment on JENA-1813 at 1/9/20 3:36 PM:
-----------------------------------------------------------

This patch appears to fix my specific query and passes all the tests.  But it's not obvious to me whether it's generally correct:
{noformat}
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
index 58b1773e9b..c21a872cd2 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
@@ -205,11 +205,16 @@ public class JoinClassifier
 
     /** Find the "effective op" - ie. the one that may be sensitive to linearization */
     private static Op effectiveOp(Op op) {
-        if ( op instanceof OpExt )
-            op = ((OpExt)op).effectiveOp() ;
-        while (safeModifier(op))
-            op = ((OpModifier)op).getSubOp() ;
-        return op ;
+        for (;;) {
+            if (op instanceof OpExt)
+                op = ((OpExt) op).effectiveOp();
+            else if (op instanceof OpGraph)
+                op = ((OpGraph) op).getSubOp();
+            else if (safeModifier(op))
+                op = ((OpModifier) op).getSubOp();
+            else
+                return op;
+        }
     }
 
     /** Helper - test for "safe" modifiers */
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
index 05db3910ee..aecd07ec6f 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
@@ -159,6 +159,12 @@ public class TestClassify extends BaseTest
         TestClassify.classifyJ(x1, false);
     }
 
+    // JENA-1813
+    @Test public void testClassify_Join_54() {
+        String x1 = "{ ?s  ?p  ?V GRAPH ?g { SELECT (?w AS ?V) { ?t  ?q  ?w } GROUP BY ?w } }";
+        TestClassify.classifyJ(x1, false);
+    }
+
     public static void classifyJ(String pattern, boolean expected)
     {
         String qs1 = "PREFIX : <http://example/>\n" ;
{noformat}


was (Author: ssmith):
This patch appears to fix my specific query.  But it's not obvious to me whether it's generally correct:
{noformat}
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
index 58b1773e9b..c21a872cd2 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
@@ -205,11 +205,16 @@ public class JoinClassifier
 
     /** Find the "effective op" - ie. the one that may be sensitive to linearization */
     private static Op effectiveOp(Op op) {
-        if ( op instanceof OpExt )
-            op = ((OpExt)op).effectiveOp() ;
-        while (safeModifier(op))
-            op = ((OpModifier)op).getSubOp() ;
-        return op ;
+        for (;;) {
+            if (op instanceof OpExt)
+                op = ((OpExt) op).effectiveOp();
+            else if (op instanceof OpGraph)
+                op = ((OpGraph) op).getSubOp();
+            else if (safeModifier(op))
+                op = ((OpModifier) op).getSubOp();
+            else
+                return op;
+        }
     }
 
     /** Helper - test for "safe" modifiers */
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
index 05db3910ee..aecd07ec6f 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
@@ -159,6 +159,12 @@ public class TestClassify extends BaseTest
         TestClassify.classifyJ(x1, false);
     }
 
+    // JENA-1813
+    @Test public void testClassify_Join_54() {
+        String x1 = "{ ?s  ?p  ?V GRAPH ?g { SELECT (?w AS ?V) { ?t  ?q  ?w } GROUP BY ?w } }";
+        TestClassify.classifyJ(x1, false);
+    }
+
     public static void classifyJ(String pattern, boolean expected)
     {
         String qs1 = "PREFIX : <http://example/>\n" ;
{noformat}

> Join optimization transform results in incorrect query results
> --------------------------------------------------------------
>
>                 Key: JENA-1813
>                 URL: https://issues.apache.org/jira/browse/JENA-1813
>             Project: Apache Jena
>          Issue Type: Bug
>          Components: ARQ
>    Affects Versions: Jena 3.13.1
>            Reporter: Shawn Smith
>            Priority: Major
>
> I think I've found a query where TransformJoinStrategy incorrectly decides that a query is linear such that a "join" operation can be replaced by a "sequence" operation.  As a result, the query returns incorrect results.  Disabling optimizations with "qe.getContext().set(ARQ.optimization, false)" fixes the issue.
> Here's the query: 
> {noformat}
> PREFIX  :  <http://example.com/>
> SELECT ?a
> WHERE {
>   GRAPH :graph { :s :p ?a }
>   GRAPH :graph {
>     SELECT (?b AS ?a)
>     WHERE { :t :q ?b }
>     GROUP BY ?b
>   }
> }
> {noformat}
> Here's the data to test it with (two quads, as Trig): 
> {noformat}
> @prefix :      <http://example.com/> .
> :graph {
>     :s      :p      "a" .
>     :t      :q      "b" .
> }
> {noformat}
> I expected the query to return zero results because the two GRAPH clauses can't find compatible bindings for ?a.  But, in practice, Jena returns ?a="a" and logs a warning:
> {noformat}
> [main] WARN  BindingUtils - merge: Mismatch : "a" != "b"{noformat}
> Note the warning is actually coming from QueryIterProjectMerge.java, not BindingUtils.java.  With more complicated queries and datasets, this issue can result in thousands or millions of logged warnings.
> The query plan before optimization looks like this:
> {noformat}
> (project (?a)
>   (join
>     (graph <http://example.com/graph>
>       (bgp (triple <http://example.com/s> <http://example.com/p> ?a)))
>     (graph <http://example.com/graph>
>       (project (?a)
>         (extend ((?a ?b))
>           (group (?b)
>             (bgp (triple <http://example.com/t> <http://example.com/q> ?b))))))))
> {noformat}
> Optimization replaces "join" with "sequence" which fails to detect conflicts on ?a:
> {noformat}
> (project (?a)
>   (sequence
>     (graph <http://example.com/graph>
>       (bgp (triple <http://example.com/s> <http://example.com/p> ?a)))
>     (graph <http://example.com/graph>
>       (project (?a)
>         (extend ((?a ?/b))
>           (group (?/b)
>             (bgp (triple <http://example.com/t> <http://example.com/q> ?/b))))))))
> {noformat}
> For convenience, here's Java code that reproduces the bug:
> {noformat}
> import org.apache.jena.query.ARQ;
> import org.apache.jena.query.Dataset;
> import org.apache.jena.query.DatasetFactory;
> import org.apache.jena.query.QueryExecution;
> import org.apache.jena.query.QueryExecutionFactory;
> import org.apache.jena.query.ResultSet;
> import org.apache.jena.riot.Lang;
> import org.apache.jena.riot.RDFParser;
> import org.junit.Test;
> public class QueryTest {
>     @Test
>     public void testGraphQuery() {
>         String query = "" +
>                 "PREFIX  :  <http://example.com/>\n" +
>                 "SELECT ?a\n" +
>                 "WHERE {\n" +
>                 "  GRAPH :graph { :s :p ?a }\n" +
>                 "  GRAPH :graph {\n" +
>                 "    SELECT (?b AS ?a)\n" +
>                 "    WHERE { :t :q ?b }\n" +
>                 "    GROUP BY ?b\n" +
>                 "  }\n" +
>                 "}\n";
>         String data = "" +
>                 "@prefix :  <http://example.com/> .\n" +
>                 ":graph {\n" +
>                 "  :s  :p  \"a\" .\n" +
>                 "  :t  :q  \"b\" .\n" +
>                 "}\n";
>         Dataset ds = DatasetFactory.create();
>         RDFParser.fromString(data).lang(Lang.TRIG).parse(ds);
>         try (QueryExecution qe = QueryExecutionFactory.create(query, ds)) {
>             qe.getContext().set(ARQ.optimization, true);  // flipping this to false fixes the test
>             ResultSet rs = qe.execSelect();
>             if (rs.hasNext()) {
>                 System.out.println(rs.nextBinding());
>                 throw new AssertionError("Result set should be empty");
>             }
>         }
>     }
> }
> {noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)