You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2013/04/19 21:04:05 UTC

svn commit: r1469989 - in /jena/trunk/jena-arq/src: main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java

Author: rvesse
Date: Fri Apr 19 19:04:01 2013
New Revision: 1469989

URL: http://svn.apache.org/r1469989
Log:
Further work on JENA-441
Generalize the optimization to deal with ORDER BY + REDUCED as well as ORDER BY + DISTINCT
Generalize the optimization to allow any sort conditions provided all mentioned variables exist in the projected variables
Add additional test cases to validate

Modified:
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java
    jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java?rev=1469989&r1=1469988&r2=1469989&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java Fri Apr 19 19:04:01 2013
@@ -18,63 +18,93 @@
 
 package com.hp.hpl.jena.sparql.algebra.optimize;
 
+import java.util.List;
+
+import com.hp.hpl.jena.query.ARQ;
 import com.hp.hpl.jena.query.SortCondition;
 import com.hp.hpl.jena.sparql.algebra.Op;
 import com.hp.hpl.jena.sparql.algebra.TransformCopy;
 import com.hp.hpl.jena.sparql.algebra.op.OpDistinct;
 import com.hp.hpl.jena.sparql.algebra.op.OpOrder;
 import com.hp.hpl.jena.sparql.algebra.op.OpProject;
+import com.hp.hpl.jena.sparql.algebra.op.OpReduced;
+import com.hp.hpl.jena.sparql.core.Var;
 
 /**
- * Prototype transformer motivated by JENA-441
  * <p>
- * This is a limited optimization that applies in the case where you have a query that meets the following conditions: 
+ * Improved optimization for {@code ORDER BY} plus {@code DISTINCT} or
+ * {@code REDUCED} combinations, see JENA-441 for original proposal and
+ * discussion.
+ * </p>
+ * <p>
+ * This optimization is enabled by default as with most ARQ optimizations and
+ * may be disabled by setting the symbol
+ * {@link ARQ#optOrderByDistinctApplication} to false.
+ * </p>
+ * <h3>Optimization Applicability</h3>
+ * <p>
+ * This is a limited optimization that applies in the case where you have a
+ * query that meets the following conditions:
  * </p>
  * <ul>
- * <li>Uses both ORDER BY and DISTINCT on the same level of the query</li>
- * <li>The list of order conditions is only simple variables</li>
- * <li>There is a fixed list of simple variables to project that correspond precisely to the order conditions
+ * <li>Uses both {@code ORDER BY} and {@code DISTINCT} or {@code REDUCED} on the
+ * same level of the query</li>
+ * <li>There is a fixed list of variables to project i.e. not {@code SELECT *}</li>
+ * <li>{@code ORDER BY} conditions only use variables present in the project
+ * list</li>
  * </ul>
  * <p>
  * Essentially this takes algebras of the following form:
  * </p>
- * <code>
+ * 
+ * <pre>
  * (distinct 
  *   (project (?var) 
  *     (order (?var) 
  *       ... )))
- * </code>
+ * </pre>
  * <p>
  * And produces algebra of the following form:
  * </p>
- * <code>
+ * 
+ * <pre>
  * (order (?var)
  *   (distinct 
  *     (project (?var) 
  *       ... )))
- * </code>
+ * </pre>
+ * <p>
+ * In the general case this in unsafe because it would change the semantics of
+ * the query since {@code ORDER BY} can access variables that are not projected.
+ * However if the conditions outlined are met then this optimization is safe,
+ * the algebras will be semantically equivalent and the resulting form likely
+ * significantly more performant, of course YMMV depending on how much data you
+ * are querying.
+ * </p>
  */
 public class TransformOrderByDistinctAppplication extends TransformCopy {
 
     @Override
     public Op transform(OpDistinct opDistinct, Op subOp) {
-        if (subOp instanceof OpProject)
-        {
-            OpProject project = (OpProject)subOp;
-            //At the project stage everything is a simple variable
-            //Inner operation must be an ORDER BY
-            if (project.getSubOp() instanceof OpOrder)
-            {
-                OpOrder order = (OpOrder)project.getSubOp();
-                
-                //Everything we wish to order by must a simple variable and correspond exactly to the project list
-                //TODO: I think this can be generalized to allow any order conditions that use variables mentioned in the project list
+        if (subOp instanceof OpProject) {
+            OpProject project = (OpProject) subOp;
+            // At the project stage everything is a simple variable
+            // Inner operation must be an ORDER BY
+            if (project.getSubOp() instanceof OpOrder) {
+                List<Var> projectVars = project.getVars();
+                OpOrder order = (OpOrder) project.getSubOp();
+
+                // Everything we wish to order by must only use variables that
+                // appear in the project list
                 boolean ok = true;
                 for (SortCondition condition : order.getConditions()) {
-                    if (!condition.getExpression().isVariable()) ok = false;
+                    if (!isValidSortCondition(condition, projectVars)) {
+                        ok = false;
+                        break;
+                    }
                 }
-                
-                //Everything checks out so we can make the change
+
+                // Everything checks out so we can make the change
                 if (ok) {
                     OpProject newProject = new OpProject(order.getSubOp(), project.getVars());
                     OpDistinct newDistinct = new OpDistinct(newProject);
@@ -82,9 +112,62 @@ public class TransformOrderByDistinctApp
                 }
             }
         }
-        
-        //If we reach here then this transform is not applicable
+
+        // If we reach here then this transform is not applicable
         return super.transform(opDistinct, subOp);
     }
 
+    @Override
+    public Op transform(OpReduced opReduced, Op subOp) {
+        if (subOp instanceof OpProject) {
+            OpProject project = (OpProject) subOp;
+            // At the project stage everything is a simple variable
+            // Inner operation must be an ORDER BY
+            if (project.getSubOp() instanceof OpOrder) {
+                List<Var> projectVars = project.getVars();
+                OpOrder order = (OpOrder) project.getSubOp();
+
+                // Everything we wish to order by must only use variables that
+                // appear in the project list
+                boolean ok = true;
+                for (SortCondition condition : order.getConditions()) {
+                    if (!isValidSortCondition(condition, projectVars)) {
+                        ok = false;
+                        break;
+                    }
+                }
+
+                // Everything checks out so we can make the change
+                if (ok) {
+                    OpProject newProject = new OpProject(order.getSubOp(), project.getVars());
+                    Op newReduced = OpReduced.create(newProject);
+                    return new OpOrder(newReduced, order.getConditions());
+                }
+            }
+        }
+
+        // If we reach here then this transform is not applicable
+        return super.transform(opReduced, subOp);
+    }
+
+    /**
+     * Determines whether a sort condition is valid in terms of this optimizer
+     * 
+     * @param cond
+     *            Sort Condition
+     * @param projectVars
+     *            Project Variables
+     * @return True if valid, false otherwise
+     */
+    private boolean isValidSortCondition(SortCondition cond, List<Var> projectVars) {
+        if (cond.getExpression().isVariable()) {
+            return projectVars.contains(cond.getExpression().asVar());
+        } else {
+            for (Var v : cond.getExpression().getVarsMentioned()) {
+                if (!projectVars.contains(v))
+                    return false;
+            }
+            return true;
+        }
+    }
 }

Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java?rev=1469989&r1=1469988&r2=1469989&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java (original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java Fri Apr 19 19:04:01 2013
@@ -242,6 +242,79 @@ public class TestOptimizer extends BaseT
         check(queryString, opExpectedString) ;
     }
     
+    @Test public void distinct_order_by_application_04()
+    {
+        // The optimization still applies when order conditions are not simple variables
+        // provided every variable used in an expression appears in the project list
+        assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
+        String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY LCASE(STR(?p))";
+        String opExpectedString =
+            "(order ((lcase (str (?p))))\n" +
+            "  (distinct\n" +
+            "    (project (?p)\n" +
+            "      (bgp (triple ?s ?p ?o)))))" ;
+        check(queryString, opExpectedString) ;
+    }
+    
+    @Test public void distinct_order_by_application_05()
+    {
+        // The optimization still applies when order conditions are not simple variables
+        // provided every variable used in an expression appears in the project list
+        assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
+        String queryString = "SELECT DISTINCT ?s ?p { ?s ?p ?o } ORDER BY LCASE(CONCAT(?s, ?p))";
+        String opExpectedString =
+            "(order ((lcase (concat ?s ?p)))\n" +
+            "  (distinct\n" +
+            "    (project (?s ?p)\n" +
+            "      (bgp (triple ?s ?p ?o)))))" ;
+        check(queryString, opExpectedString) ;
+    }
+    
+    @Test public void distinct_order_by_application_06()
+    {
+        // The optimization can apply when order conditions are not simple variables
+        // provided every variable used in an expression appears in the project list
+        // In this case it should not apply because the condition used a variable that
+        // does not appear in the project list
+        assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
+        String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY LCASE(CONCAT(?s, ?p))";
+        String opExpectedString =
+            "  (distinct\n" +
+            "    (project (?p)\n" +
+            "      (order ((lcase (concat ?s ?p)))\n" +
+            "      (bgp (triple ?s ?p ?o)))))" ;
+        check(queryString, opExpectedString) ;
+    }
+    
+    @Test public void reduced_order_by_application_01()
+    {
+        assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
+        String queryString = "SELECT REDUCED ?p { ?s ?p ?o } ORDER BY ?p";
+        String opExpectedString =
+            "(order (?p)\n" +
+            "  (reduced\n" +
+            "    (project (?p)\n" +
+            "      (bgp (triple ?s ?p ?o)))))" ;
+        check(queryString, opExpectedString) ;
+    }
+    
+    @Test public void reduced_order_by_application_02()
+    {
+        try {
+            ARQ.setFalse(ARQ.optOrderByDistinctApplication) ;
+            assertTrue(ARQ.isFalse(ARQ.optOrderByDistinctApplication)) ;
+            String queryString = "SELECT REDUCED ?p { ?s ?p ?o } ORDER BY ?p";
+            String opExpectedString =
+                "(reduced\n" +
+                "  (project (?p)\n" +
+                "    (order (?p)\n" +
+                "      (bgp (triple ?s ?p ?o)))))" ;
+            check(queryString, opExpectedString) ;
+        } finally {
+            ARQ.unset(ARQ.optOrderByDistinctApplication);
+        }
+    }
+    
     @Test public void subQueryProject_01() {
         String qs = StrUtils.strjoinNL
             ( "SELECT *"