You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2013/04/19 23:01:08 UTC

svn commit: r1470043 - in /jena/trunk/jena-arq: ReleaseNotes.txt src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformDistinctToReduced.java src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java

Author: rvesse
Date: Fri Apr 19 21:01:08 2013
New Revision: 1470043

URL: http://svn.apache.org/r1470043
Log:
Expand DISTINCT to REDUCED optimization to cope with SELECT DISTINCT ?var { } ORDER BY ?var style queries (JENA-441)

Modified:
    jena/trunk/jena-arq/ReleaseNotes.txt
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformDistinctToReduced.java
    jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java

Modified: jena/trunk/jena-arq/ReleaseNotes.txt
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/ReleaseNotes.txt?rev=1470043&r1=1470042&r2=1470043&view=diff
==============================================================================
--- jena/trunk/jena-arq/ReleaseNotes.txt (original)
+++ jena/trunk/jena-arq/ReleaseNotes.txt Fri Apr 19 21:01:08 2013
@@ -24,6 +24,8 @@ ChangeLog for ARQ
   now closed
 + New ResultSetPeekable interface extends ResultSet with peeking capabilities, ResultSetMem implements
   this and ResultSetPeeking is provided as an implementing wrapper over an arbitrary ResultSet
++ JENA-441 : New optimizations for ORDER BY + DISTINCT/REDUCED combinations that gives substantially better
+  performance for some queries
 
 ==== Jena 2.10.0
 

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformDistinctToReduced.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformDistinctToReduced.java?rev=1470043&r1=1470042&r2=1470043&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformDistinctToReduced.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformDistinctToReduced.java Fri Apr 19 21:01:08 2013
@@ -18,22 +18,49 @@
 
 package com.hp.hpl.jena.sparql.algebra.optimize;
 
-import com.hp.hpl.jena.sparql.algebra.Op ;
-import com.hp.hpl.jena.sparql.algebra.TransformCopy ;
-import com.hp.hpl.jena.sparql.algebra.op.OpDistinct ;
-import com.hp.hpl.jena.sparql.algebra.op.OpOrder ;
-import com.hp.hpl.jena.sparql.algebra.op.OpReduced ;
+import com.hp.hpl.jena.sparql.algebra.Op;
+import com.hp.hpl.jena.sparql.algebra.TransformCopy;
+import com.hp.hpl.jena.sparql.algebra.op.OpDistinct;
+import com.hp.hpl.jena.sparql.algebra.op.OpOrder;
+import com.hp.hpl.jena.sparql.algebra.op.OpProject;
+import com.hp.hpl.jena.sparql.algebra.op.OpReduced;
 
+/**
+ * <p>
+ * Transforms generic {@code DISTINCT} plus {@code ORDER BY} combinations to
+ * {@code REDUCED} plus {@code ORDER BY} which typically gives better
+ * performance and memory consumption because engines have to keep less data
+ * in-memory to evaluate it.
+ * </p>
+ * <p>
+ * See also {@link TransformOrderByDistinctAppplication} which is a better
+ * optimization for these kinds of queries but only applies to a limited
+ * range of queries. Where possible that optimization is applied in preference
+ * to this one.
+ * </p>
+ * <p>
+ * {@link TransformTopN} covers the case of {@code DISTINCT} plus
+ * {@code ORDER BY} where there is also a {@code LIMIT}. Where possible that
+ * optimization is applied in preference to either this or
+ * {@link TransformOrderByDistinctAppplication}.
+ * </p>
+ * 
+ */
 public class TransformDistinctToReduced extends TransformCopy {
 
     // Best is this is after TransformTopN but they are order independent
-    // TopN of "reduced or distinct of order" is handled. 
+    // TopN of "reduced or distinct of order" is handled.
     @Override
-    public Op transform(OpDistinct opDistinct, Op subOp) { 
-        if ( subOp instanceof OpOrder ) {
-            return OpReduced.create(subOp) ;
+    public Op transform(OpDistinct opDistinct, Op subOp) {
+        if (subOp instanceof OpOrder) {
+            return OpReduced.create(subOp);
+        } else if (subOp instanceof OpProject) {
+            OpProject project = (OpProject) subOp;
+            if (project.getSubOp() instanceof OpOrder) {
+                return OpReduced.create(subOp);
+            }
         }
-        return super.transform(opDistinct, subOp) ; 
+        return super.transform(opDistinct, subOp);
     }
 
 }

Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java?rev=1470043&r1=1470042&r2=1470043&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java (original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java Fri Apr 19 21:01:08 2013
@@ -200,6 +200,18 @@ public class TestOptimizer extends BaseT
         }
     }
     
+    @Test public void distinct_to_reduced_03()
+    {
+        assertTrue(ARQ.isTrueOrUndef(ARQ.optDistinctToReduced)) ;
+        String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY ?p ?o"  ;  
+        String opExpectedString = 
+            "(reduced\n" + 
+            "  (project (?p)\n" +
+            "    (order (?p ?o)\n" +
+            "      (bgp (triple ?s ?p ?o)))))" ; 
+        check(queryString, opExpectedString) ;
+    }
+    
     @Test public void distinct_order_by_application_01()
     {
         assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
@@ -216,6 +228,7 @@ public class TestOptimizer extends BaseT
     {
         try {
             ARQ.setFalse(ARQ.optOrderByDistinctApplication) ;
+            ARQ.setFalse(ARQ.optDistinctToReduced) ;
             assertTrue(ARQ.isFalse(ARQ.optOrderByDistinctApplication)) ;
             String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY ?p";
             String opExpectedString =
@@ -226,6 +239,7 @@ public class TestOptimizer extends BaseT
             check(queryString, opExpectedString) ;
         } finally {
             ARQ.unset(ARQ.optOrderByDistinctApplication);
+            ARQ.unset(ARQ.optDistinctToReduced);
         }
     }
     
@@ -276,10 +290,11 @@ public class TestOptimizer extends BaseT
         // provided every variable used in an expression appears in the project list
         // In this case it should not apply because the condition used a variable that
         // does not appear in the project list
+        // However the DISTINCT to REDUCED optimization does apply
         assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
         String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY LCASE(CONCAT(?s, ?p))";
         String opExpectedString =
-            "  (distinct\n" +
+            "  (reduced\n" +
             "    (project (?p)\n" +
             "      (order ((lcase (concat ?s ?p)))\n" +
             "      (bgp (triple ?s ?p ?o)))))" ;