You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2018/03/14 15:06:40 UTC

[3/7] jena git commit: JENA-1507: No rows when no match if GROUP BY used

JENA-1507: No rows when no match if GROUP BY used


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/8a64ffee
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/8a64ffee
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/8a64ffee

Branch: refs/heads/master
Commit: 8a64ffeef483df37bcbd313ada28104a945d2027
Parents: 459a2b8
Author: Andy Seaborne <an...@apache.org>
Authored: Tue Mar 13 15:21:29 2018 +0000
Committer: Andy Seaborne <an...@apache.org>
Committed: Tue Mar 13 15:21:29 2018 +0000

----------------------------------------------------------------------
 .../sparql/engine/iterator/QueryIterGroup.java  | 85 +++++++++-----------
 .../jena/sparql/expr/TestCustomAggregates.java  | 16 +++-
 .../sparql/expr/TestStatisticsAggregates.java   | 52 +++++++++---
 jena-arq/testing/ARQ/GroupBy/count-11.srx       |  5 --
 jena-arq/testing/ARQ/GroupBy/count-13.srx       |  5 --
 5 files changed, 94 insertions(+), 69 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/8a64ffee/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterGroup.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterGroup.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterGroup.java
index 5edbec3..fb884ef 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterGroup.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterGroup.java
@@ -26,8 +26,8 @@ import java.util.List ;
 import org.apache.jena.atlas.iterator.Iter ;
 import org.apache.jena.atlas.iterator.IteratorDelayedInitialization ;
 import org.apache.jena.atlas.lib.Pair ;
-import org.apache.jena.ext.com.google.common.collect.HashMultimap;
 import org.apache.jena.ext.com.google.common.collect.Multimap;
+import org.apache.jena.ext.com.google.common.collect.MultimapBuilder;
 import org.apache.jena.graph.Node ;
 import org.apache.jena.sparql.core.Var ;
 import org.apache.jena.sparql.core.VarExprList ;
@@ -67,11 +67,6 @@ public class QueryIterGroup extends QueryIterPlainWrapper
         super.closeIterator();
     }
 	
-    // Phase 1 : Consume the input iterator, assigning groups (keys)
-    //           and push rows through the aggregator function. 
-    
-    // Phase 2 : Go over the group bindings and assign the value of each aggregation.
-	
 	private static Pair<Var, Accumulator> placeholder = Pair.create((Var)null, (Accumulator)null) ; 
     
     private static Iterator<Binding> calc(final QueryIterator iter, 
@@ -82,16 +77,42 @@ public class QueryIterGroup extends QueryIterPlainWrapper
             @Override
             protected Iterator<Binding> initializeIterator() {
 
-                boolean noAggregators = (aggregators == null || aggregators.isEmpty());
-
-                // Phase 1 : assign bindings to buckets by key and pump through the aggregators.
-                Multimap<Binding, Pair<Var, Accumulator>> accumulators = HashMultimap.create();
+                boolean hasAggregators = ( aggregators != null && ! aggregators.isEmpty() );
+                boolean hasGroupBy = ! groupVarExpr.isEmpty();
+                boolean noInput = ! iter.hasNext();
+
+                // Case: No input.
+                // 1/ GROUP BY - no rows.
+                // 2/ No GROUP BY, e.g. COUNT=0, the results is one row always and not handled here.
+                if ( noInput ) {
+                    if ( hasGroupBy )
+                        // GROUP        
+                        return Iter.nullIterator() ;
+                    if ( ! hasAggregators ) {
+                        // No GROUP BY, no aggregators. One result row of no colums.
+                        return Iter.singleton(BindingFactory.binding());
+                    }
+                    // No GROUP BY, has aggregators. Insert default values.
+                    BindingMap binding = BindingFactory.create();
+                    for ( ExprAggregator agg : aggregators ) {
+                        Node value = agg.getAggregator().getValueEmpty();
+                        if ( value == null )
+                            continue;
+                        Var v = agg.getVar();
+                        binding.add(v, value);
+                    }
+                    return Iter.singleton(binding);
+                }
+                
+                // Case: there is input.
+                // Phase 1 : Create keys and aggreators per key, and pump bindings through the aggregators.
+                Multimap<Binding, Pair<Var, Accumulator>> accumulators = MultimapBuilder.hashKeys().arrayListValues().build();
 
                 while (iter.hasNext()) {
                     Binding b = iter.nextBinding();
                     Binding key = genKey(groupVarExpr, b, execCxt);
 
-                    if ( noAggregators ) {
+                    if ( !hasAggregators ) {
                         // Put in a dummy to remember the input.
                         accumulators.put(key, placeholder);
                         continue;
@@ -111,57 +132,25 @@ public class QueryIterGroup extends QueryIterPlainWrapper
                         pair.getRight().accumulate(b, execCxt);
                 }
 
-                // Phase 2 : Empty input
-                // has as iter.hasNext false at start.
-
-                // If there are no binding from the input stage, two things can happen.
-                // If there are no aggregators, there are no groups.
-                // If there are aggregators, then they may have a default value.
-
-                if ( accumulators.isEmpty() ) {
-                    if ( noAggregators ) {
-                        // No rows to group, no aggregators.
-                        // ==> No result rows.
-                        return Iter.nullIterator();
-                    }
-
-                    BindingMap binding = BindingFactory.create();
-
-                    for ( ExprAggregator agg : aggregators ) {
-                        Var v = agg.getVar();
-                        Node value = agg.getAggregator().getValueEmpty();
-                        if ( value != null ) {
-                            binding.add(v, value);
-                        }
-                    }
-
-                    if ( binding == null )
-                        // This does not happen if there are any aggregators.
-                        return Iter.nullIterator();
-                    // cast to get the static type inference to work.
-                    return Iter.singletonIter((Binding)binding);
-                }
-
                 // Phase 2 : There was input and so there are some groups.
                 // For each bucket, get binding, add aggregator values to the binding.
                 // We used AccNull so there are always accumulators.
 
-                if ( noAggregators )
+                if ( !hasAggregators )
                     // We used placeholder so there are always the key.
                     return accumulators.keySet().iterator();
 
                 List<Binding> results = new ArrayList<>();
-
                 for ( Binding k : accumulators.keySet() ) {
                     Collection<Pair<Var, Accumulator>> accs = accumulators.get(k);
                     BindingMap b = BindingFactory.create(k);
 
                     for ( Pair<Var, Accumulator> pair : accs ) {
-                        Var v = pair.getLeft();
                         NodeValue value = pair.getRight().getValue();
-                        Node n = (value == null) ? null : value.asNode();
-                        if ( v == null || n == null ) {} else
-                            b.add(v, n);
+                        if ( value == null )
+                            continue;
+                        Var v = pair.getLeft();
+                        b.add(v, value.asNode());
                     }
                     results.add(b);
                 }

http://git-wip-us.apache.org/repos/asf/jena/blob/8a64ffee/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestCustomAggregates.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestCustomAggregates.java b/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestCustomAggregates.java
index 47b0216..e9c2b83 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestCustomAggregates.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestCustomAggregates.java
@@ -135,9 +135,10 @@ public class TestCustomAggregates extends BaseTest {
     }
 
     @Test public void customAgg_21() {
+        // No GROUP BY, no match => default value
         Graph g = SSE.parseGraph("(graph (:s :p :o) (:s :p 1))") ;
         Model m = ModelFactory.createModelForGraph(g) ;
-        String qs = "SELECT (<"+aggIRI+">(?o) AS ?x) {?s ?p ?o FILTER (false) } GROUP BY ?s" ;
+        String qs = "SELECT (<"+aggIRI+">(?o) AS ?x) {?s ?p ?o FILTER (false) }" ;
         Query q = QueryFactory.create(qs, Syntax.syntaxARQ) ;
         try (QueryExecution qExec = QueryExecutionFactory.create(q, m) ) {
             ResultSet rs = qExec.execSelect() ;
@@ -149,6 +150,19 @@ public class TestCustomAggregates extends BaseTest {
     }
     
     @Test public void customAgg_22() {
+        // GROUP BY, no match +. no rows.
+        Graph g = SSE.parseGraph("(graph (:s :p :o) (:s :p 1))") ;
+        Model m = ModelFactory.createModelForGraph(g) ;
+        String qs = "SELECT (<"+aggIRI+">(?o) AS ?x) {?s ?p ?o FILTER (false) } GROUP BY ?s" ;
+        
+        Query q = QueryFactory.create(qs, Syntax.syntaxARQ) ;
+        try (QueryExecution qExec = QueryExecutionFactory.create(q, m) ) {
+            ResultSet rs = qExec.execSelect() ;
+            assertFalse(rs.hasNext());
+        }
+    }
+    
+    @Test public void customAgg_23() {
         String qs = "SELECT (<"+aggIRI+">(?o) AS ?x) {?s ?p ?o }" ;
         Query q = QueryFactory.create(qs, Syntax.syntaxARQ) ;
         Op op = Algebra.compile(q) ;

http://git-wip-us.apache.org/repos/asf/jena/blob/8a64ffee/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestStatisticsAggregates.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestStatisticsAggregates.java b/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestStatisticsAggregates.java
index 829fccf..a65f197 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestStatisticsAggregates.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestStatisticsAggregates.java
@@ -243,17 +243,17 @@ public class TestStatisticsAggregates {
     
     // Empty -> error 
     @Test public void agg_stat_stdev_empty() {
-        testErr("agg:stdev(?x)", dsEmpty, syntaxSPARQL_11) ; 
+        testEmpty("agg:stdev(?x)", dsEmpty, syntaxSPARQL_11) ; 
     }
     
     // Empty -> error 
     @Test public void agg_stat_stdev_samp_empty() {
-        testErr("agg:stdev_samp(?x)", dsEmpty, syntaxSPARQL_11) ; 
+        testEmpty("agg:stdev_samp(?x)", dsEmpty, syntaxSPARQL_11) ; 
     }
     
     // Empty -> error 
     @Test public void agg_stat_stdev_pop_empty() {
-        testErr("agg:stdev_pop(?x)", dsEmpty, syntaxSPARQL_11) ; 
+        testEmpty("agg:stdev_pop(?x)", dsEmpty, syntaxSPARQL_11) ; 
     }
     
     // Sample of one -> error
@@ -273,26 +273,24 @@ public class TestStatisticsAggregates {
 
     // Population of one -> 0e0
     @Test public void agg_stat_stdev_pop_size_one() {
-        Query query = build("agg:stdev_pop(?x)", syntaxSPARQL_11) ; 
+        Query query = buildGroupBy("agg:stdev_pop(?x)", syntaxSPARQL_11) ; 
         test(query, 0e0, ds1) ; 
     }
     
     // Population of one -> 0e0
     @Test public void agg_stat_var_pop_size_one() {
-        Query query = build("agg:var_pop(?x)", syntaxSPARQL_11) ; 
+        Query query = buildGroupBy("agg:var_pop(?x)", syntaxSPARQL_11) ; 
         test(query, 0e0, ds1) ; 
     }
     
     // By keyword
     
-    
-    
     private static void test(String qsAgg, double expected, Syntax syntax) {
         test(qsAgg, expected, syntax, ds) ;
     }
 
     private static void test(String qsAgg, double expected, Syntax syntax,  DatasetGraph dsg) {
-        Query query = build(qsAgg, syntax) ; 
+        Query query = buildGroupBy(qsAgg, syntax) ; 
         test(query, expected, dsg) ;
     }
 
@@ -304,15 +302,40 @@ public class TestStatisticsAggregates {
         }
     }
 
-    private static Query build(String qsAgg, Syntax syntax) {
+    private static Query buildGroupBy(String qsAgg, Syntax syntax) {
         String NL = "\n" ;
         String qs = PRE+NL+"SELECT ("+qsAgg+NL+"AS ?X) WHERE {?s ?p ?x} GROUP BY ?s" ;
         Query query = QueryFactory.create(qs, syntax) ;
         return query ;
     }
     
+    private static Query buildNoGroupBy(String qsAgg, Syntax syntax) {
+        String NL = "\n" ;
+        String qs = PRE+NL+"SELECT ("+qsAgg+NL+"AS ?X) WHERE {?s ?p ?x}" ;
+        Query query = QueryFactory.create(qs, syntax) ;
+        return query ;
+    }
+    
+    // Error in calculation (e.g. 2+ needed)
     private void testErr(String qsAgg, DatasetGraph ds, Syntax syntax) {
-        Query query = build(qsAgg, syntax) ;
+        Query query = buildGroupBy(qsAgg, syntax) ;
+        try ( QueryExecution qExec = QueryExecutionFactory.create(query, DatasetFactory.wrap(ds)) ) {
+            ResultSet rs = qExec.execSelect() ;
+            assertTrue(rs.getResultVars().contains("X")) ;
+            Binding b = rs.nextBinding() ;
+            assertFalse(b.contains(Var.alloc("X"))) ;
+        }
+    }
+    
+    // Behaviour on empty
+    private void testEmpty(String qsAgg, DatasetGraph ds, Syntax syntax) {
+        testEmptyNoGroupBy(qsAgg, ds, syntax);
+        testEmptyGroupBy(qsAgg, ds, syntax);
+    }
+    
+    // Behaviour on empty - aggregate and no GROUP BY
+    private void testEmptyNoGroupBy(String qsAgg, DatasetGraph ds, Syntax syntax) {
+        Query query = buildNoGroupBy(qsAgg, syntax) ;
         try ( QueryExecution qExec = QueryExecutionFactory.create(query, DatasetFactory.wrap(ds)) ) {
             ResultSet rs = qExec.execSelect() ;
             assertTrue(rs.hasNext()) ;
@@ -321,4 +344,13 @@ public class TestStatisticsAggregates {
             assertFalse(b.contains(Var.alloc("X"))) ;
         }
     }
+    
+    // Behaviour on empty - GROUP BY present
+    private void testEmptyGroupBy(String qsAgg, DatasetGraph ds, Syntax syntax) {
+        Query query = buildGroupBy(qsAgg, syntax) ;
+        try ( QueryExecution qExec = QueryExecutionFactory.create(query, DatasetFactory.wrap(ds)) ) {
+            ResultSet rs = qExec.execSelect() ;
+            assertFalse(rs.hasNext()) ;
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/8a64ffee/jena-arq/testing/ARQ/GroupBy/count-11.srx
----------------------------------------------------------------------
diff --git a/jena-arq/testing/ARQ/GroupBy/count-11.srx b/jena-arq/testing/ARQ/GroupBy/count-11.srx
index 71870b1..724db26 100644
--- a/jena-arq/testing/ARQ/GroupBy/count-11.srx
+++ b/jena-arq/testing/ARQ/GroupBy/count-11.srx
@@ -4,10 +4,5 @@
     <variable name="c"/>
   </head>
   <results>
-    <result>
-      <binding name="c">
-        <literal datatype="http://www.w3.org/2001/XMLSchema#integer">0</literal>
-      </binding>
-    </result>
   </results>
 </sparql>

http://git-wip-us.apache.org/repos/asf/jena/blob/8a64ffee/jena-arq/testing/ARQ/GroupBy/count-13.srx
----------------------------------------------------------------------
diff --git a/jena-arq/testing/ARQ/GroupBy/count-13.srx b/jena-arq/testing/ARQ/GroupBy/count-13.srx
index 71870b1..724db26 100644
--- a/jena-arq/testing/ARQ/GroupBy/count-13.srx
+++ b/jena-arq/testing/ARQ/GroupBy/count-13.srx
@@ -4,10 +4,5 @@
     <variable name="c"/>
   </head>
   <results>
-    <result>
-      <binding name="c">
-        <literal datatype="http://www.w3.org/2001/XMLSchema#integer">0</literal>
-      </binding>
-    </result>
   </results>
 </sparql>