You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2013/04/19 23:01:08 UTC
svn commit: r1470043 - in /jena/trunk/jena-arq: ReleaseNotes.txt
src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformDistinctToReduced.java
src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
Author: rvesse
Date: Fri Apr 19 21:01:08 2013
New Revision: 1470043
URL: http://svn.apache.org/r1470043
Log:
Expand DISTINCT to REDUCED optimization to cope with SELECT DISTINCT ?var { } ORDER BY ?var style queries (JENA-441)
Modified:
jena/trunk/jena-arq/ReleaseNotes.txt
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformDistinctToReduced.java
jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
Modified: jena/trunk/jena-arq/ReleaseNotes.txt
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/ReleaseNotes.txt?rev=1470043&r1=1470042&r2=1470043&view=diff
==============================================================================
--- jena/trunk/jena-arq/ReleaseNotes.txt (original)
+++ jena/trunk/jena-arq/ReleaseNotes.txt Fri Apr 19 21:01:08 2013
@@ -24,6 +24,8 @@ ChangeLog for ARQ
now closed
+ New ResultSetPeekable interface extends ResultSet with peeking capabilities, ResultSetMem implements
this and ResultSetPeeking is provided as an implementing wrapper over an arbitrary ResultSet
++ JENA-441 : New optimizations for ORDER BY + DISTINCT/REDUCED combinations that gives substantially better
+ performance for some queries
==== Jena 2.10.0
Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformDistinctToReduced.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformDistinctToReduced.java?rev=1470043&r1=1470042&r2=1470043&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformDistinctToReduced.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformDistinctToReduced.java Fri Apr 19 21:01:08 2013
@@ -18,22 +18,49 @@
package com.hp.hpl.jena.sparql.algebra.optimize;
-import com.hp.hpl.jena.sparql.algebra.Op ;
-import com.hp.hpl.jena.sparql.algebra.TransformCopy ;
-import com.hp.hpl.jena.sparql.algebra.op.OpDistinct ;
-import com.hp.hpl.jena.sparql.algebra.op.OpOrder ;
-import com.hp.hpl.jena.sparql.algebra.op.OpReduced ;
+import com.hp.hpl.jena.sparql.algebra.Op;
+import com.hp.hpl.jena.sparql.algebra.TransformCopy;
+import com.hp.hpl.jena.sparql.algebra.op.OpDistinct;
+import com.hp.hpl.jena.sparql.algebra.op.OpOrder;
+import com.hp.hpl.jena.sparql.algebra.op.OpProject;
+import com.hp.hpl.jena.sparql.algebra.op.OpReduced;
+/**
+ * <p>
+ * Transforms generic {@code DISTINCT} plus {@code ORDER BY} combinations to
+ * {@code REDUCED} plus {@code ORDER BY} which typically gives better
+ * performance and memory consumption because engines have to keep less data
+ * in-memory to evaluate it.
+ * </p>
+ * <p>
+ * See also {@link TransformOrderByDistinctAppplication} which is a better
+ * optimization for these kinds of queries but only applies to a limited
+ * range of queries. Where possible that optimization is applied in preference
+ * to this one.
+ * </p>
+ * <p>
+ * {@link TransformTopN} covers the case of {@code DISTINCT} plus
+ * {@code ORDER BY} where there is also a {@code LIMIT}. Where possible that
+ * optimization is applied in preference to either this or
+ * {@link TransformOrderByDistinctAppplication}.
+ * </p>
+ *
+ */
public class TransformDistinctToReduced extends TransformCopy {
// Best is this is after TransformTopN but they are order independent
- // TopN of "reduced or distinct of order" is handled.
+ // TopN of "reduced or distinct of order" is handled.
@Override
- public Op transform(OpDistinct opDistinct, Op subOp) {
- if ( subOp instanceof OpOrder ) {
- return OpReduced.create(subOp) ;
+ public Op transform(OpDistinct opDistinct, Op subOp) {
+ if (subOp instanceof OpOrder) {
+ return OpReduced.create(subOp);
+ } else if (subOp instanceof OpProject) {
+ OpProject project = (OpProject) subOp;
+ if (project.getSubOp() instanceof OpOrder) {
+ return OpReduced.create(subOp);
+ }
}
- return super.transform(opDistinct, subOp) ;
+ return super.transform(opDistinct, subOp);
}
}
Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java?rev=1470043&r1=1470042&r2=1470043&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java (original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java Fri Apr 19 21:01:08 2013
@@ -200,6 +200,18 @@ public class TestOptimizer extends BaseT
}
}
+ @Test public void distinct_to_reduced_03()
+ {
+ assertTrue(ARQ.isTrueOrUndef(ARQ.optDistinctToReduced)) ;
+ String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY ?p ?o" ;
+ String opExpectedString =
+ "(reduced\n" +
+ " (project (?p)\n" +
+ " (order (?p ?o)\n" +
+ " (bgp (triple ?s ?p ?o)))))" ;
+ check(queryString, opExpectedString) ;
+ }
+
@Test public void distinct_order_by_application_01()
{
assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
@@ -216,6 +228,7 @@ public class TestOptimizer extends BaseT
{
try {
ARQ.setFalse(ARQ.optOrderByDistinctApplication) ;
+ ARQ.setFalse(ARQ.optDistinctToReduced) ;
assertTrue(ARQ.isFalse(ARQ.optOrderByDistinctApplication)) ;
String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY ?p";
String opExpectedString =
@@ -226,6 +239,7 @@ public class TestOptimizer extends BaseT
check(queryString, opExpectedString) ;
} finally {
ARQ.unset(ARQ.optOrderByDistinctApplication);
+ ARQ.unset(ARQ.optDistinctToReduced);
}
}
@@ -276,10 +290,11 @@ public class TestOptimizer extends BaseT
// provided every variable used in an expression appears in the project list
// In this case it should not apply because the condition used a variable that
// does not appear in the project list
+ // However the DISTINCT to REDUCED optimization does apply
assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY LCASE(CONCAT(?s, ?p))";
String opExpectedString =
- " (distinct\n" +
+ " (reduced\n" +
" (project (?p)\n" +
" (order ((lcase (concat ?s ?p)))\n" +
" (bgp (triple ?s ?p ?o)))))" ;