You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2013/04/19 21:04:05 UTC
svn commit: r1469989 - in /jena/trunk/jena-arq/src:
main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java
test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
Author: rvesse
Date: Fri Apr 19 19:04:01 2013
New Revision: 1469989
URL: http://svn.apache.org/r1469989
Log:
Further work on JENA-441
Generalize the optimization to deal with ORDER BY + REDUCED as well as ORDER BY + DISTINCT
Generalize the optimization to allow any sort conditions provided all mentioned variables exist in the projected variables
Add additional test cases to validate
Modified:
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java
jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java?rev=1469989&r1=1469988&r2=1469989&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java Fri Apr 19 19:04:01 2013
@@ -18,63 +18,93 @@
package com.hp.hpl.jena.sparql.algebra.optimize;
+import java.util.List;
+
+import com.hp.hpl.jena.query.ARQ;
import com.hp.hpl.jena.query.SortCondition;
import com.hp.hpl.jena.sparql.algebra.Op;
import com.hp.hpl.jena.sparql.algebra.TransformCopy;
import com.hp.hpl.jena.sparql.algebra.op.OpDistinct;
import com.hp.hpl.jena.sparql.algebra.op.OpOrder;
import com.hp.hpl.jena.sparql.algebra.op.OpProject;
+import com.hp.hpl.jena.sparql.algebra.op.OpReduced;
+import com.hp.hpl.jena.sparql.core.Var;
/**
- * Prototype transformer motivated by JENA-441
* <p>
- * This is a limited optimization that applies in the case where you have a query that meets the following conditions:
+ * Improved optimization for {@code ORDER BY} plus {@code DISTINCT} or
+ * {@code REDUCED} combinations, see JENA-441 for original proposal and
+ * discussion.
+ * </p>
+ * <p>
+ * This optimization is enabled by default as with most ARQ optimizations and
+ * may be disabled by setting the symbol
+ * {@link ARQ#optOrderByDistinctApplication} to false.
+ * </p>
+ * <h3>Optimization Applicability</h3>
+ * <p>
+ * This is a limited optimization that applies in the case where you have a
+ * query that meets the following conditions:
* </p>
* <ul>
- * <li>Uses both ORDER BY and DISTINCT on the same level of the query</li>
- * <li>The list of order conditions is only simple variables</li>
- * <li>There is a fixed list of simple variables to project that correspond precisely to the order conditions
+ * <li>Uses both {@code ORDER BY} and {@code DISTINCT} or {@code REDUCED} on the
+ * same level of the query</li>
+ * <li>There is a fixed list of variables to project i.e. not {@code SELECT *}</li>
+ * <li>{@code ORDER BY} conditions only use variables present in the project
+ * list</li>
* </ul>
* <p>
* Essentially this takes algebras of the following form:
* </p>
- * <code>
+ *
+ * <pre>
* (distinct
* (project (?var)
* (order (?var)
* ... )))
- * </code>
+ * </pre>
* <p>
* And produces algebra of the following form:
* </p>
- * <code>
+ *
+ * <pre>
* (order (?var)
* (distinct
* (project (?var)
* ... )))
- * </code>
+ * </pre>
+ * <p>
+ * In the general case this in unsafe because it would change the semantics of
+ * the query since {@code ORDER BY} can access variables that are not projected.
+ * However if the conditions outlined are met then this optimization is safe,
+ * the algebras will be semantically equivalent and the resulting form likely
+ * significantly more performant, of course YMMV depending on how much data you
+ * are querying.
+ * </p>
*/
public class TransformOrderByDistinctAppplication extends TransformCopy {
@Override
public Op transform(OpDistinct opDistinct, Op subOp) {
- if (subOp instanceof OpProject)
- {
- OpProject project = (OpProject)subOp;
- //At the project stage everything is a simple variable
- //Inner operation must be an ORDER BY
- if (project.getSubOp() instanceof OpOrder)
- {
- OpOrder order = (OpOrder)project.getSubOp();
-
- //Everything we wish to order by must a simple variable and correspond exactly to the project list
- //TODO: I think this can be generalized to allow any order conditions that use variables mentioned in the project list
+ if (subOp instanceof OpProject) {
+ OpProject project = (OpProject) subOp;
+ // At the project stage everything is a simple variable
+ // Inner operation must be an ORDER BY
+ if (project.getSubOp() instanceof OpOrder) {
+ List<Var> projectVars = project.getVars();
+ OpOrder order = (OpOrder) project.getSubOp();
+
+ // Everything we wish to order by must only use variables that
+ // appear in the project list
boolean ok = true;
for (SortCondition condition : order.getConditions()) {
- if (!condition.getExpression().isVariable()) ok = false;
+ if (!isValidSortCondition(condition, projectVars)) {
+ ok = false;
+ break;
+ }
}
-
- //Everything checks out so we can make the change
+
+ // Everything checks out so we can make the change
if (ok) {
OpProject newProject = new OpProject(order.getSubOp(), project.getVars());
OpDistinct newDistinct = new OpDistinct(newProject);
@@ -82,9 +112,62 @@ public class TransformOrderByDistinctApp
}
}
}
-
- //If we reach here then this transform is not applicable
+
+ // If we reach here then this transform is not applicable
return super.transform(opDistinct, subOp);
}
+ @Override
+ public Op transform(OpReduced opReduced, Op subOp) {
+ if (subOp instanceof OpProject) {
+ OpProject project = (OpProject) subOp;
+ // At the project stage everything is a simple variable
+ // Inner operation must be an ORDER BY
+ if (project.getSubOp() instanceof OpOrder) {
+ List<Var> projectVars = project.getVars();
+ OpOrder order = (OpOrder) project.getSubOp();
+
+ // Everything we wish to order by must only use variables that
+ // appear in the project list
+ boolean ok = true;
+ for (SortCondition condition : order.getConditions()) {
+ if (!isValidSortCondition(condition, projectVars)) {
+ ok = false;
+ break;
+ }
+ }
+
+ // Everything checks out so we can make the change
+ if (ok) {
+ OpProject newProject = new OpProject(order.getSubOp(), project.getVars());
+ Op newReduced = OpReduced.create(newProject);
+ return new OpOrder(newReduced, order.getConditions());
+ }
+ }
+ }
+
+ // If we reach here then this transform is not applicable
+ return super.transform(opReduced, subOp);
+ }
+
+ /**
+ * Determines whether a sort condition is valid in terms of this optimizer
+ *
+ * @param cond
+ * Sort Condition
+ * @param projectVars
+ * Project Variables
+ * @return True if valid, false otherwise
+ */
+ private boolean isValidSortCondition(SortCondition cond, List<Var> projectVars) {
+ if (cond.getExpression().isVariable()) {
+ return projectVars.contains(cond.getExpression().asVar());
+ } else {
+ for (Var v : cond.getExpression().getVarsMentioned()) {
+ if (!projectVars.contains(v))
+ return false;
+ }
+ return true;
+ }
+ }
}
Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java?rev=1469989&r1=1469988&r2=1469989&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java (original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java Fri Apr 19 19:04:01 2013
@@ -242,6 +242,79 @@ public class TestOptimizer extends BaseT
check(queryString, opExpectedString) ;
}
+ @Test public void distinct_order_by_application_04()
+ {
+ // The optimization still applies when order conditions are not simple variables
+ // provided every variable used in an expression appears in the project list
+ assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
+ String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY LCASE(STR(?p))";
+ String opExpectedString =
+ "(order ((lcase (str (?p))))\n" +
+ " (distinct\n" +
+ " (project (?p)\n" +
+ " (bgp (triple ?s ?p ?o)))))" ;
+ check(queryString, opExpectedString) ;
+ }
+
+ @Test public void distinct_order_by_application_05()
+ {
+ // The optimization still applies when order conditions are not simple variables
+ // provided every variable used in an expression appears in the project list
+ assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
+ String queryString = "SELECT DISTINCT ?s ?p { ?s ?p ?o } ORDER BY LCASE(CONCAT(?s, ?p))";
+ String opExpectedString =
+ "(order ((lcase (concat ?s ?p)))\n" +
+ " (distinct\n" +
+ " (project (?s ?p)\n" +
+ " (bgp (triple ?s ?p ?o)))))" ;
+ check(queryString, opExpectedString) ;
+ }
+
+ @Test public void distinct_order_by_application_06()
+ {
+ // The optimization can apply when order conditions are not simple variables
+ // provided every variable used in an expression appears in the project list
+ // In this case it should not apply because the condition used a variable that
+ // does not appear in the project list
+ assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
+ String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY LCASE(CONCAT(?s, ?p))";
+ String opExpectedString =
+ " (distinct\n" +
+ " (project (?p)\n" +
+ " (order ((lcase (concat ?s ?p)))\n" +
+ " (bgp (triple ?s ?p ?o)))))" ;
+ check(queryString, opExpectedString) ;
+ }
+
+ @Test public void reduced_order_by_application_01()
+ {
+ assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ;
+ String queryString = "SELECT REDUCED ?p { ?s ?p ?o } ORDER BY ?p";
+ String opExpectedString =
+ "(order (?p)\n" +
+ " (reduced\n" +
+ " (project (?p)\n" +
+ " (bgp (triple ?s ?p ?o)))))" ;
+ check(queryString, opExpectedString) ;
+ }
+
+ @Test public void reduced_order_by_application_02()
+ {
+ try {
+ ARQ.setFalse(ARQ.optOrderByDistinctApplication) ;
+ assertTrue(ARQ.isFalse(ARQ.optOrderByDistinctApplication)) ;
+ String queryString = "SELECT REDUCED ?p { ?s ?p ?o } ORDER BY ?p";
+ String opExpectedString =
+ "(reduced\n" +
+ " (project (?p)\n" +
+ " (order (?p)\n" +
+ " (bgp (triple ?s ?p ?o)))))" ;
+ check(queryString, opExpectedString) ;
+ } finally {
+ ARQ.unset(ARQ.optOrderByDistinctApplication);
+ }
+ }
+
@Test public void subQueryProject_01() {
String qs = StrUtils.strjoinNL
( "SELECT *"