You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by ca...@apache.org on 2011/09/02 17:16:03 UTC
svn commit: r1164576 - in /incubator/jena/Jena2/ARQ/trunk: ./
src-test/com/hp/hpl/jena/sparql/algebra/optimize/
src/com/hp/hpl/jena/sparql/algebra/optimize/
src/com/hp/hpl/jena/sparql/engine/iterator/
src/com/hp/hpl/jena/sparql/engine/main/
Author: castagna
Date: Fri Sep 2 15:16:01 2011
New Revision: 1164576
URL: http://svn.apache.org/viewvc?rev=1164576&view=rev
Log:
JENA-108
Optimization for SELECT DISTINCT * { ... } ORDER BY ... LIMIT N to avoid a total sort, similar to JENA-89 and it interacts with JENA-90 (now the interaction is a good one).
Tests were already present (and they all pass).
Modified:
incubator/jena/Jena2/ARQ/trunk/ChangeLog.txt
incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java
incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java
incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java
Modified: incubator/jena/Jena2/ARQ/trunk/ChangeLog.txt
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/ChangeLog.txt?rev=1164576&r1=1164575&r2=1164576&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/ChangeLog.txt (original)
+++ incubator/jena/Jena2/ARQ/trunk/ChangeLog.txt Fri Sep 2 15:16:01 2011
@@ -3,6 +3,8 @@ ChangeLog for ARQ
==== ARQ 2.8.9
++ Add optimization for DISTINCT/ORDER BY/LIMIT N (JENA-108)
++ Add optimization for DISTINCT/ORDER BY (JENA-90)
+ Add optimization for ORDER BY/LIMIT N (JENA-89)
+ General upgrade of dependent systems (not Lucene - see LARQ module for use with Lucene 3)
+ Make CONCAT follow the SPARQL 1.1 spec properly.
Modified: incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java?rev=1164576&r1=1164575&r2=1164576&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java Fri Sep 2 15:16:01 2011
@@ -165,12 +165,35 @@ public class TestOptimizer extends BaseT
}
}
+ @Test public void slice_order_to_topn_04()
+ {
+ assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
+ String queryString = "SELECT DISTINCT * { ?s ?p ?o } ORDER BY ?p ?o LIMIT 42" ;
+ String opExpectedString =
+ "(top (42 ?p ?o)\n" +
+ " (distinct\n" +
+ " (bgp (triple ?s ?p ?o))))" ;
+ check(queryString, opExpectedString) ;
+ }
+
+ @Test public void slice_order_to_topn_05()
+ {
+ assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
+ String queryString = "SELECT DISTINCT * { ?s ?p ?o } ORDER BY ?p ?o LIMIT 4242" ;
+ String opExpectedString =
+ "(slice _ 4242\n" +
+ " (reduced\n" +
+ " (order (?p ?o)\n" +
+ " (bgp (triple ?s ?p ?o)))))" ;
+ check(queryString, opExpectedString) ;
+ }
+
@Test public void distinct_to_reduced_01()
{
assertTrue(ARQ.isTrueOrUndef(ARQ.optDistinctToReduced)) ;
String queryString = "SELECT DISTINCT * { ?s ?p ?o } ORDER BY ?p ?o" ;
String opExpectedString =
- "(reduced \n" +
+ "(reduced\n" +
" (order (?p ?o)\n" +
" (bgp (triple ?s ?p ?o))))" ;
check(queryString, opExpectedString) ;
@@ -183,7 +206,7 @@ public class TestOptimizer extends BaseT
assertTrue(ARQ.isFalse(ARQ.optDistinctToReduced)) ;
String queryString = "SELECT DISTINCT * { ?s ?p ?o } ORDER BY ?p ?o" ;
String opExpectedString =
- "(distinct \n" +
+ "(distinct\n" +
" (order (?p ?o)\n" +
" (bgp (triple ?s ?p ?o))))" ;
check(queryString, opExpectedString) ;
Modified: incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java?rev=1164576&r1=1164575&r2=1164576&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java Fri Sep 2 15:16:01 2011
@@ -18,16 +18,17 @@
package com.hp.hpl.jena.sparql.algebra.optimize;
-import com.hp.hpl.jena.query.Query;
-import com.hp.hpl.jena.sparql.algebra.Op;
-import com.hp.hpl.jena.sparql.algebra.TransformCopy;
-import com.hp.hpl.jena.sparql.algebra.op.OpOrder;
-import com.hp.hpl.jena.sparql.algebra.op.OpSlice;
-import com.hp.hpl.jena.sparql.algebra.op.OpTopN;
+import com.hp.hpl.jena.query.Query ;
+import com.hp.hpl.jena.sparql.algebra.Op ;
+import com.hp.hpl.jena.sparql.algebra.TransformCopy ;
+import com.hp.hpl.jena.sparql.algebra.op.OpDistinct ;
+import com.hp.hpl.jena.sparql.algebra.op.OpOrder ;
+import com.hp.hpl.jena.sparql.algebra.op.OpSlice ;
+import com.hp.hpl.jena.sparql.algebra.op.OpTopN ;
public class TransformTopN extends TransformCopy {
- public static final int TOPN_LIMIT_THRESHOLD = 100;
+ public static final int TOPN_LIMIT_THRESHOLD = 100 ;
@Override
public Op transform(OpSlice opSlice, Op subOp) {
@@ -35,7 +36,15 @@ public class TransformTopN extends Trans
if ( ( ( opSlice.getStart() == 0 ) || ( opSlice.getStart() == Query.NOLIMIT ) ) &&
( opSlice.getLength() < TOPN_LIMIT_THRESHOLD ) ) {
if ( subOp instanceof OpOrder ) {
- return new OpTopN(((OpOrder) subOp).getSubOp(), (int)opSlice.getLength(), ((OpOrder) subOp).getConditions()) ;
+ OpOrder opOrder = (OpOrder)subOp ;
+ return new OpTopN( opOrder.getSubOp(), (int)opSlice.getLength(), opOrder.getConditions() ) ;
+ } else if ( subOp instanceof OpDistinct ) {
+ OpDistinct opDistinct = (OpDistinct)subOp ;
+ if ( opDistinct.getSubOp() instanceof OpOrder ) {
+ OpOrder opOrder = (OpOrder)opDistinct.getSubOp() ;
+ Op opDistinct2 = OpDistinct.create(opOrder.getSubOp()) ;
+ return new OpTopN( opDistinct2, (int)opSlice.getLength(), opOrder.getConditions() ) ;
+ }
}
}
Modified: incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java?rev=1164576&r1=1164575&r2=1164576&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java Fri Sep 2 15:16:01 2011
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package com.hp.hpl.jena.sparql.engine.iterator;
import java.util.Arrays ;
@@ -28,18 +46,20 @@ public class QueryIterTopN extends Query
* This leaves the least N in the heap.
*/
private PriorityQueue<Binding> heap ;
- long limit ;
+ private long limit ;
+ private final boolean distinct ;
- public QueryIterTopN(QueryIterator qIter, List<SortCondition> conditions, long numItems, ExecutionContext context)
+ public QueryIterTopN(QueryIterator qIter, List<SortCondition> conditions, long numItems, boolean distinct, ExecutionContext context)
{
- this(qIter, new BindingComparator(conditions, context), numItems, context) ;
+ this(qIter, new BindingComparator(conditions, context), numItems, distinct, context) ;
}
- public QueryIterTopN(QueryIterator qIter, Comparator<Binding> comparator, long numItems, ExecutionContext context)
+ public QueryIterTopN(QueryIterator qIter, Comparator<Binding> comparator, long numItems, boolean distinct, ExecutionContext context)
{
super(null, context) ;
- this.embeddedIterator = qIter;
-
+ this.embeddedIterator = qIter ;
+ this.distinct = distinct ;
+
limit = numItems ;
if ( limit == Query.NOLIMIT )
limit = Long.MAX_VALUE ;
@@ -77,7 +97,7 @@ public class QueryIterTopN extends Query
{
Binding binding = qIter.next() ;
if ( heap.size() < limit )
- heap.add(binding) ;
+ add(binding) ;
else {
Binding currentMaxLeastN = heap.peek() ;
@@ -85,7 +105,7 @@ public class QueryIterTopN extends Query
{
// If binding is less than current Nth least ...
heap.poll() ; // Drop Nth least.
- heap.add(binding) ;
+ add(binding) ;
}
}
}
@@ -97,4 +117,18 @@ public class QueryIterTopN extends Query
return iter ;
}
};
- }}
+ }
+
+ private void add (Binding binding)
+ {
+ if ( distinct )
+ {
+ if ( !heap.contains(binding) ) heap.add(binding) ;
+ }
+ else
+ {
+ heap.add(binding) ;
+ }
+ }
+
+}
\ No newline at end of file
Modified: incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java?rev=1164576&r1=1164575&r2=1164576&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java Fri Sep 2 15:16:01 2011
@@ -383,8 +383,15 @@ public class OpExecutor
protected QueryIterator execute(OpTopN opTop, QueryIterator input)
{
- QueryIterator qIter = executeOp(opTop.getSubOp(), input) ;
- qIter = new QueryIterTopN(qIter, opTop.getConditions(), opTop.getLimit(), execCxt) ;
+ QueryIterator qIter = null ;
+ if ( opTop.getSubOp() instanceof OpDistinct ) {
+ OpDistinct opDistinct = (OpDistinct)opTop.getSubOp() ;
+ qIter = executeOp(opDistinct.getSubOp(), input) ;
+ qIter = new QueryIterTopN(qIter, opTop.getConditions(), opTop.getLimit(), true, execCxt) ;
+ } else {
+ qIter = executeOp(opTop.getSubOp(), input) ;
+ qIter = new QueryIterTopN(qIter, opTop.getConditions(), opTop.getLimit(), false, execCxt) ;
+ }
return qIter ;
}