You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2013/04/19 20:21:35 UTC

svn commit: r1469981 - in /jena/trunk/jena-arq/src: main/java/com/hp/hpl/jena/query/ main/java/com/hp/hpl/jena/sparql/algebra/optimize/ test/java/com/hp/hpl/jena/sparql/algebra/optimize/

Author: rvesse
Date: Fri Apr 19 18:21:34 2013
New Revision: 1469981

URL: http://svn.apache.org/r1469981
Log:
Initial addition of new improved optimization for the ORDER BY+DISTINCT case (JENA-441)

Added:
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java
Modified:
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/query/ARQ.java
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/Optimize.java
    jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/query/ARQ.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/query/ARQ.java?rev=1469981&r1=1469980&r2=1469981&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/query/ARQ.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/query/ARQ.java Fri Apr 19 18:21:34 2013
@@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory ;
 
 import com.hp.hpl.jena.sparql.ARQConstants ;
 import com.hp.hpl.jena.sparql.SystemARQ ;
+import com.hp.hpl.jena.sparql.algebra.optimize.TransformOrderByDistinctAppplication;
 import com.hp.hpl.jena.sparql.engine.main.StageBuilder ;
 import com.hp.hpl.jena.sparql.expr.nodevalue.XSDFuncOp ;
 import com.hp.hpl.jena.sparql.lib.Metadata ;
@@ -285,10 +286,20 @@ public class ARQ
     public static final Symbol optTopNSorting = ARQConstants.allocSymbol("optTopNSorting") ;
     
     /** 
-     *  Context key controlling whether a DISTINCT-ORDER BY query is done replacing the distinct with a reduced.
+     *  Context key controlling whether a DISTINCT-ORDER BY query is done by replacing the distinct with a reduced.
      *  Default is "true" - the reduced operator does not need to keep a data structure with all previously seen bindings.
      */  
     public static final Symbol optDistinctToReduced = ARQConstants.allocSymbol("optDistinctToReduced") ;
+    
+    /**
+     * Context key controlling whether a DISTINCT-ORDER BY query is done by applying the ORDER BY after the DISTINCT
+     * when default SPARQL semantics usually mean ORDER BY applies before DISTINCT.  This optimization applies only
+     * in a subset of cases unlike the more general {@link #optDistinctToReduced} optimization.
+     * <p>
+     * See {@link TransformOrderByDistinctAppplication} for more discussion on exactly when this may apply
+     * </p>
+     */
+    public static final Symbol optOrderByDistinctApplication = ARQConstants.allocSymbol("optOrderByDistinctApplication");
 
     /** 
      *  Context key controlling whether the standard optimizer applies

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/Optimize.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/Optimize.java?rev=1469981&r1=1469980&r2=1469981&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/Optimize.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/Optimize.java Fri Apr 19 18:21:34 2013
@@ -186,12 +186,19 @@ public class Optimize implements Rewrite
         
         if ( context.isTrueOrUndef(ARQ.optTopNSorting) )
         	op = apply("TopN Sorting", new TransformTopN(), op) ;
+        
+        // ORDER BY+DISTINCT optimizations
+        // We apply the one that changes evaluation order first since when it does apply it will give much
+        // better performance than just transforming DISTINCT to REDUCED
+        
+        if ( context.isTrueOrUndef(ARQ.optOrderByDistinctApplication) )
+            op = apply("Apply DISTINCT prior to ORDER BY where possible", new TransformOrderByDistinctAppplication(), op);
 
         if ( context.isTrueOrUndef(ARQ.optDistinctToReduced) )
             op = apply("Distinct replaced with reduced", new TransformDistinctToReduced(), op) ;
         
         // Convert paths to triple patterns. 
-        // Also done in the AlgebraGenerator so this transform step catches programattically built op expressions 
+        // Also done in the AlgebraGenerator so this transform step catches programmatically built op expressions 
         op = apply("Path flattening", new TransformPathFlattern(), op) ;
         
         // Find joins/leftJoin that can be done by index joins (generally preferred as fixed memory overhead).

Added: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java?rev=1469981&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java (added)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java Fri Apr 19 18:21:34 2013
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.algebra.optimize;
+
+import com.hp.hpl.jena.query.SortCondition;
+import com.hp.hpl.jena.sparql.algebra.Op;
+import com.hp.hpl.jena.sparql.algebra.TransformCopy;
+import com.hp.hpl.jena.sparql.algebra.op.OpDistinct;
+import com.hp.hpl.jena.sparql.algebra.op.OpOrder;
+import com.hp.hpl.jena.sparql.algebra.op.OpProject;
+
+/**
+ * Prototype transformer motivated by JENA-441
+ * <p>
+ * This is a limited optimization that applies in the case where you have a query that meets the following conditions: 
+ * </p>
+ * <ul>
+ * <li>Uses both ORDER BY and DISTINCT on the same level of the query</li>
+ * <li>The list of order conditions is only simple variables</li>
+ * <li>There is a fixed list of simple variables to project that correspond precisely to the order conditions
+ * </ul>
+ * <p>
+ * Essentially this takes algebras of the following form:
+ * </p>
+ * <code>
+ * (distinct 
+ *   (project (?var) 
+ *     (order (?var) 
+ *       ... )))
+ * </code>
+ * <p>
+ * And produces algebra of the following form:
+ * </p>
+ * <code>
+ * (order (?var)
+ *   (distinct 
+ *     (project (?var) 
+ *       ... )))
+ * </code>
+ */
+public class TransformOrderByDistinctAppplication extends TransformCopy {
+
+    @Override
+    public Op transform(OpDistinct opDistinct, Op subOp) {
+        if (subOp instanceof OpProject)
+        {
+            OpProject project = (OpProject)subOp;
+            //At the project stage everything is a simple variable
+            //Inner operation must be an ORDER BY
+            if (project.getSubOp() instanceof OpOrder)
+            {
+                OpOrder order = (OpOrder)project.getSubOp();
+                
+                //Everything we wish to order by must a simple variable and correspond exactly to the project list
+                //TODO: I think this can be generalized to allow any order conditions that use variables mentioned in the project list
+                boolean ok = true;
+                for (SortCondition condition : order.getConditions()) {
+                    if (!condition.getExpression().isVariable()) ok = false;
+                }
+                
+                //Everything checks out so we can make the change
+                if (ok) {
+                    OpProject newProject = new OpProject(order.getSubOp(), project.getVars());
+                    OpDistinct newDistinct = new OpDistinct(newProject);
+                    return new OpOrder(newDistinct, order.getConditions());
+                }
+            }
+        }
+        
+        //If we reach here then this transform is not applicable
+        return super.transform(opDistinct, subOp);
+    }
+
+}

Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java?rev=1469981&r1=1469980&r2=1469981&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java (original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java Fri Apr 19 18:21:34 2013
@@ -200,6 +200,48 @@ public class TestOptimizer extends BaseT
         }
     }
     
+    @Test public void distinct_order_by_application_01()
+    {
+        assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
+        String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY ?p";
+        String opExpectedString =
+            "(order (?p)\n" +
+            "  (distinct\n" +
+            "    (project (?p)\n" +
+            "      (bgp (triple ?s ?p ?o)))))" ;
+        check(queryString, opExpectedString) ;
+    }
+    
+    @Test public void distinct_order_by_application_02()
+    {
+        try {
+            ARQ.setFalse(ARQ.optOrderByDistinctApplication) ;
+            assertTrue(ARQ.isFalse(ARQ.optOrderByDistinctApplication)) ;
+            String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY ?p";
+            String opExpectedString =
+                "(distinct\n" +
+                "  (project (?p)\n" +
+                "    (order (?p)\n" +
+                "      (bgp (triple ?s ?p ?o)))))" ;
+            check(queryString, opExpectedString) ;
+        } finally {
+            ARQ.unset(ARQ.optOrderByDistinctApplication);
+        }
+    }
+    
+    @Test public void distinct_order_by_application_03()
+    {
+        // Evaluation reordering optimization doesn't apply if it's a SELECT *
+        // However the DISTINCT to REDUCED transformation still applies
+        assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
+        String queryString = "SELECT DISTINCT * { ?s ?p ?o } ORDER BY ?p";
+        String opExpectedString =
+            "  (reduced\n" +
+            "    (order (?p)\n" +
+            "      (bgp (triple ?s ?p ?o))))" ;
+        check(queryString, opExpectedString) ;
+    }
+    
     @Test public void subQueryProject_01() {
         String qs = StrUtils.strjoinNL
             ( "SELECT *"
@@ -263,6 +305,7 @@ public class TestOptimizer extends BaseT
         Op opOptimize = Algebra.optimize(opToOptimize) ;
         Op opExpected = SSE.parseOp(opExpectedString) ;
         assertEquals(opExpected, opOptimize) ;
+
     }
     
 }