You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by ca...@apache.org on 2011/09/02 17:16:03 UTC

svn commit: r1164576 - in /incubator/jena/Jena2/ARQ/trunk: ./ src-test/com/hp/hpl/jena/sparql/algebra/optimize/ src/com/hp/hpl/jena/sparql/algebra/optimize/ src/com/hp/hpl/jena/sparql/engine/iterator/ src/com/hp/hpl/jena/sparql/engine/main/

Author: castagna
Date: Fri Sep  2 15:16:01 2011
New Revision: 1164576

URL: http://svn.apache.org/viewvc?rev=1164576&view=rev
Log:
JENA-108

Optimization for SELECT DISTINCT * { ... } ORDER BY ... LIMIT N to avoid a total sort, similar to JENA-89 and it interacts with JENA-90 (now the interaction is a good one).
Tests were already present (and they all pass).

Modified:
    incubator/jena/Jena2/ARQ/trunk/ChangeLog.txt
    incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
    incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java
    incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java
    incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java

Modified: incubator/jena/Jena2/ARQ/trunk/ChangeLog.txt
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/ChangeLog.txt?rev=1164576&r1=1164575&r2=1164576&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/ChangeLog.txt (original)
+++ incubator/jena/Jena2/ARQ/trunk/ChangeLog.txt Fri Sep  2 15:16:01 2011
@@ -3,6 +3,8 @@ ChangeLog for ARQ
 
 ==== ARQ 2.8.9
 
++ Add optimization for DISTINCT/ORDER BY/LIMIT N (JENA-108)
++ Add optimization for DISTINCT/ORDER BY (JENA-90)
 + Add optimization for ORDER BY/LIMIT N (JENA-89)
 + General upgrade of dependent systems (not Lucene - see LARQ module for use with Lucene 3)
 + Make CONCAT follow the SPARQL 1.1 spec properly. 

Modified: incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java?rev=1164576&r1=1164575&r2=1164576&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java Fri Sep  2 15:16:01 2011
@@ -165,12 +165,35 @@ public class TestOptimizer extends BaseT
         }
     }
 
+    @Test public void slice_order_to_topn_04()
+    {
+        assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
+        String queryString = "SELECT DISTINCT * { ?s ?p ?o } ORDER BY ?p ?o LIMIT 42"  ;  
+        String opExpectedString = 
+            "(top (42 ?p ?o)\n" + 
+            "  (distinct\n" +
+            "  (bgp (triple ?s ?p ?o))))" ; 
+        check(queryString, opExpectedString) ;
+    }
+
+    @Test public void slice_order_to_topn_05()
+    {
+        assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
+        String queryString = "SELECT DISTINCT * { ?s ?p ?o } ORDER BY ?p ?o LIMIT 4242"  ;  
+        String opExpectedString = 
+            "(slice _ 4242\n" + 
+            "  (reduced\n" +
+            "    (order (?p ?o)\n" +
+            "      (bgp (triple ?s ?p ?o)))))" ; 
+        check(queryString, opExpectedString) ;
+    }
+
     @Test public void distinct_to_reduced_01()
     {
         assertTrue(ARQ.isTrueOrUndef(ARQ.optDistinctToReduced)) ;
         String queryString = "SELECT DISTINCT * { ?s ?p ?o } ORDER BY ?p ?o"  ;  
         String opExpectedString = 
-            "(reduced \n" + 
+            "(reduced\n" + 
             "  (order (?p ?o)\n" +
             "    (bgp (triple ?s ?p ?o))))" ; 
         check(queryString, opExpectedString) ;
@@ -183,7 +206,7 @@ public class TestOptimizer extends BaseT
             assertTrue(ARQ.isFalse(ARQ.optDistinctToReduced)) ;
             String queryString = "SELECT DISTINCT * { ?s ?p ?o } ORDER BY ?p ?o"  ;  
             String opExpectedString = 
-                "(distinct \n" + 
+                "(distinct\n" + 
                 "  (order (?p ?o)\n" +
                 "    (bgp (triple ?s ?p ?o))))" ; 
             check(queryString, opExpectedString) ;

Modified: incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java?rev=1164576&r1=1164575&r2=1164576&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java Fri Sep  2 15:16:01 2011
@@ -18,16 +18,17 @@
 
 package com.hp.hpl.jena.sparql.algebra.optimize;
 
-import com.hp.hpl.jena.query.Query;
-import com.hp.hpl.jena.sparql.algebra.Op;
-import com.hp.hpl.jena.sparql.algebra.TransformCopy;
-import com.hp.hpl.jena.sparql.algebra.op.OpOrder;
-import com.hp.hpl.jena.sparql.algebra.op.OpSlice;
-import com.hp.hpl.jena.sparql.algebra.op.OpTopN;
+import com.hp.hpl.jena.query.Query ;
+import com.hp.hpl.jena.sparql.algebra.Op ;
+import com.hp.hpl.jena.sparql.algebra.TransformCopy ;
+import com.hp.hpl.jena.sparql.algebra.op.OpDistinct ;
+import com.hp.hpl.jena.sparql.algebra.op.OpOrder ;
+import com.hp.hpl.jena.sparql.algebra.op.OpSlice ;
+import com.hp.hpl.jena.sparql.algebra.op.OpTopN ;
 
 public class TransformTopN extends TransformCopy {
 
-	public static final int TOPN_LIMIT_THRESHOLD = 100; 
+	public static final int TOPN_LIMIT_THRESHOLD = 100 ;
 	
     @Override
 	public Op transform(OpSlice opSlice, Op subOp) { 
@@ -35,7 +36,15 @@ public class TransformTopN extends Trans
     	if ( ( ( opSlice.getStart() == 0 ) || ( opSlice.getStart() == Query.NOLIMIT ) ) && 
     	     ( opSlice.getLength() < TOPN_LIMIT_THRESHOLD ) ) {
         	if ( subOp instanceof OpOrder ) {
-        		return new OpTopN(((OpOrder) subOp).getSubOp(), (int)opSlice.getLength(), ((OpOrder) subOp).getConditions()) ;
+        	    OpOrder opOrder = (OpOrder)subOp ;
+        		return new OpTopN( opOrder.getSubOp(), (int)opSlice.getLength(), opOrder.getConditions() ) ;
+        	} else if ( subOp instanceof OpDistinct ) {
+        	    OpDistinct opDistinct = (OpDistinct)subOp ;
+        	    if ( opDistinct.getSubOp() instanceof OpOrder ) {
+        	        OpOrder opOrder = (OpOrder)opDistinct.getSubOp() ;
+        	        Op opDistinct2 = OpDistinct.create(opOrder.getSubOp()) ;
+        	        return new OpTopN( opDistinct2, (int)opSlice.getLength(), opOrder.getConditions() ) ;
+        	    }
         	}
     	}
 

Modified: incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java?rev=1164576&r1=1164575&r2=1164576&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java Fri Sep  2 15:16:01 2011
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package com.hp.hpl.jena.sparql.engine.iterator;
 
 import java.util.Arrays ;
@@ -28,18 +46,20 @@ public class QueryIterTopN extends Query
 	 * This leaves the least N in the heap.    
 	 */
     private PriorityQueue<Binding> heap ;
-    long limit ;
+    private long limit ;
+    private final boolean distinct ;
 	
-    public QueryIterTopN(QueryIterator qIter, List<SortCondition> conditions, long numItems, ExecutionContext context)
+    public QueryIterTopN(QueryIterator qIter, List<SortCondition> conditions, long numItems, boolean distinct, ExecutionContext context)
     {
-        this(qIter, new BindingComparator(conditions, context), numItems, context) ;
+        this(qIter, new BindingComparator(conditions, context), numItems, distinct, context) ;
     }
 
-    public QueryIterTopN(QueryIterator qIter, Comparator<Binding> comparator, long numItems, ExecutionContext context)
+    public QueryIterTopN(QueryIterator qIter, Comparator<Binding> comparator, long numItems, boolean distinct, ExecutionContext context)
     {
         super(null, context) ;
-        this.embeddedIterator = qIter;
-        
+        this.embeddedIterator = qIter ;
+        this.distinct = distinct ;
+
         limit = numItems ;
         if ( limit == Query.NOLIMIT )
             limit = Long.MAX_VALUE ;
@@ -77,7 +97,7 @@ public class QueryIterTopN extends Query
                 {
                     Binding binding = qIter.next() ;
                     if ( heap.size() < limit )
-                    	heap.add(binding) ;
+                    	add(binding) ;
                     else {
                         Binding currentMaxLeastN = heap.peek() ;
                         
@@ -85,7 +105,7 @@ public class QueryIterTopN extends Query
                     	{
                     	    // If binding is less than current Nth least ...
                     		heap.poll() ;     // Drop Nth least.
-                        	heap.add(binding) ;
+                        	add(binding) ;
                     	}
                     }
                 }
@@ -97,4 +117,18 @@ public class QueryIterTopN extends Query
                 return iter ;
         	}
 		};
-    }}
+    }
+
+    private void add (Binding binding) 
+    {
+        if ( distinct ) 
+        {
+            if ( !heap.contains(binding) ) heap.add(binding) ;
+        } 
+        else 
+        {
+            heap.add(binding) ;
+        }
+    }
+
+}
\ No newline at end of file

Modified: incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java?rev=1164576&r1=1164575&r2=1164576&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java Fri Sep  2 15:16:01 2011
@@ -383,8 +383,15 @@ public class OpExecutor
 
     protected QueryIterator execute(OpTopN opTop, QueryIterator input)
     { 
-        QueryIterator qIter = executeOp(opTop.getSubOp(), input) ;
-        qIter = new QueryIterTopN(qIter, opTop.getConditions(), opTop.getLimit(), execCxt) ;
+        QueryIterator qIter = null ;
+        if ( opTop.getSubOp() instanceof OpDistinct ) {
+            OpDistinct opDistinct = (OpDistinct)opTop.getSubOp() ;
+            qIter = executeOp(opDistinct.getSubOp(), input) ;
+            qIter = new QueryIterTopN(qIter, opTop.getConditions(), opTop.getLimit(), true, execCxt) ;
+        } else {
+            qIter = executeOp(opTop.getSubOp(), input) ;
+            qIter = new QueryIterTopN(qIter, opTop.getConditions(), opTop.getLimit(), false, execCxt) ;
+        }
         return qIter ;
     }