You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2014/01/06 12:23:26 UTC
svn commit: r1555717 - in /jena/trunk/jena-arq/src:
main/java/com/hp/hpl/jena/sparql/algebra/optimize/
test/java/com/hp/hpl/jena/sparql/algebra/optimize/
Author: rvesse
Date: Mon Jan 6 11:23:25 2014
New Revision: 1555717
URL: http://svn.apache.org/r1555717
Log:
Initial implementation of new filter inequality optimization (JENA-615)
Provides a Transform implementation and some unit tests, not yet integrated into standard optimizer
Added:
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterInequality.java
Modified:
jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/AbstractTestTransform.java
jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java
Added: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterInequality.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterInequality.java?rev=1555717&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterInequality.java (added)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterInequality.java Mon Jan 6 11:23:25 2014
@@ -0,0 +1,417 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.algebra.optimize;
+
+import static org.apache.jena.atlas.lib.CollectionUtils.disjoint;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.jena.atlas.lib.Pair;
+
+import com.hp.hpl.jena.query.ARQ;
+import com.hp.hpl.jena.sparql.algebra.Op;
+import com.hp.hpl.jena.sparql.algebra.OpVars;
+import com.hp.hpl.jena.sparql.algebra.Table;
+import com.hp.hpl.jena.sparql.algebra.TableFactory;
+import com.hp.hpl.jena.sparql.algebra.TransformCopy;
+import com.hp.hpl.jena.sparql.algebra.op.*;
+import com.hp.hpl.jena.sparql.core.Var;
+import com.hp.hpl.jena.sparql.core.VarExprList;
+import com.hp.hpl.jena.sparql.engine.binding.BindingFactory;
+import com.hp.hpl.jena.sparql.engine.binding.BindingMap;
+import com.hp.hpl.jena.sparql.expr.*;
+
+/**
+ * A transform that aims to optimize queries where there is an inequality
+ * constraint on a variable to speed up evaluation e.g
+ *
+ * <pre>
+ * PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ * SELECT *
+ * WHERE
+ * {
+ * ?s rdf:type <http://type> ;
+ * ?p ?o .
+ * FILTER(?p != rdf:type)
+ * }
+ * </pre>
+ *
+ * Would transform to the following:
+ *
+ * <pre>
+ * PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ * SELECT *
+ * WHERE
+ * {
+ * ?s rdf:type <http://type> ;
+ * ?p ?o .
+ * MINUS { VALUES ?p { rdf:type } }
+ * }
+ * </pre>
+ *
+ * <h3>Applicability</h3>
+ * <p>
+ * This optimizer is conservative in that it only makes the optimization where
+ * the inequality constraint is against a non-literal as otherwise substituting
+ * the value changes the query semantics because it switches from value equality
+ * to the more restrictive term equality. The optimization is safe for
+ * non-literals because for those value and term equality are equivalent (in
+ * fact value equality is defined to be term equality).
+ * </p>
+ * <p>
+ * There are also various nested algebra structures that can make the
+ * optimization unsafe and so it does not take place if any of those situations
+ * is detected.
+ * </p>
+ */
+public class TransformFilterInequality extends TransformCopy {
+ /**
+ * Creates a new transform
+ */
+ public TransformFilterInequality() {
+ }
+
+ @Override
+ public Op transform(OpFilter opFilter, Op subOp) {
+ Op op = apply(opFilter.getExprs(), subOp);
+ if (op == null)
+ return super.transform(opFilter, subOp);
+ return op;
+ }
+
+ private static Op apply(ExprList exprs, Op subOp) {
+ // ---- Find and extract any inequality filters.
+ Pair<List<Pair<Var, NodeValue>>, ExprList> p = preprocessFilterInequality(exprs);
+ if (p == null || p.getLeft().size() == 0)
+ return null;
+
+ List<Pair<Var, NodeValue>> inequalities = p.getLeft();
+ Collection<Var> varsMentioned = varsMentionedInInequalityFilters(inequalities);
+ ExprList remaining = p.getRight();
+
+ // If any of the conditions overlap the optimization is unsafe
+ // (the query is also likely incorrect but that isn't our problem)
+
+ // TODO There is actually a special case here, if the inequality
+ // constraints are conflicting then we can special case to table empty.
+
+ // ---- Check if the subOp is the right shape to transform.
+ Op op = subOp;
+
+ // Special case : deduce that the filter will always "eval unbound"
+ // hence eliminate all rows. Return the empty table.
+ if (testSpecialCaseUnused(subOp, inequalities, remaining))
+ return OpTable.empty();
+
+ // Special case: the deep left op of a OpConditional/OpLeftJoin is unit
+ // table.
+ // This is
+ // { OPTIONAL{P1} OPTIONAL{P2} ... FILTER(?x = :x) }
+ if (testSpecialCase1(subOp, inequalities, remaining)) {
+ // Find backbone of ops
+ List<Op> ops = extractOptionals(subOp);
+ ops = processSpecialCase1(ops, inequalities);
+ // Put back together
+ op = rebuild((Op2) subOp, ops);
+ // Put all filters - either we optimized, or we left alone.
+ // Either way, the complete set of filter expressions.
+ op = OpFilter.filter(exprs, op);
+ return op;
+ }
+
+ // ---- Transform
+
+ if (!safeToTransform(varsMentioned, op))
+ return null;
+
+ op = processFilterWorker(op, inequalities);
+
+ // ---- Place any filter expressions around the processed sub op.
+ if (remaining.size() > 0)
+ op = OpFilter.filter(remaining, op);
+ return op;
+ }
+
+ // --- find and extract
+ private static Pair<List<Pair<Var, NodeValue>>, ExprList> preprocessFilterInequality(ExprList exprs) {
+ List<Pair<Var, NodeValue>> exprsFilterInequality = new ArrayList<Pair<Var, NodeValue>>();
+ ExprList exprsOther = new ExprList();
+ for (Expr e : exprs.getList()) {
+ Pair<Var, NodeValue> p = preprocess(e);
+ if (p != null)
+ exprsFilterInequality.add(p);
+ else
+ exprsOther.add(e);
+ }
+ if (exprsFilterInequality.size() == 0)
+ return null;
+ return Pair.create(exprsFilterInequality, exprsOther);
+ }
+
+ private static Pair<Var, NodeValue> preprocess(Expr e) {
+ if (!(e instanceof E_NotEquals))
+ return null;
+
+ ExprFunction2 eq = (ExprFunction2) e;
+ Expr left = eq.getArg1();
+ Expr right = eq.getArg2();
+
+ Var var = null;
+ NodeValue constant = null;
+
+ if (left.isVariable() && right.isConstant()) {
+ var = left.asVar();
+ constant = right.getConstant();
+ } else if (right.isVariable() && left.isConstant()) {
+ var = right.asVar();
+ constant = left.getConstant();
+ }
+
+ if (var == null || constant == null)
+ return null;
+
+ // Final check for "!=" where a FILTER != can do value matching when the
+ // graph does not.
+ // Value based?
+ if (!ARQ.isStrictMode() && constant.isLiteral())
+ return null;
+
+ return Pair.create(var, constant);
+ }
+
+ private static Collection<Var> varsMentionedInInequalityFilters(List<Pair<Var, NodeValue>> inequalities) {
+ Set<Var> vars = new HashSet<Var>();
+ for (Pair<Var, NodeValue> p : inequalities)
+ vars.add(p.getLeft());
+ return vars;
+ }
+
+ private static boolean safeToTransform(Collection<Var> varsEquality, Op op) {
+ // TODO This may actually be overly conservative, since we aren't going
+ // to perform substitution we can potentially remove much of this method
+
+ // Structure as a visitor?
+ if (op instanceof OpBGP || op instanceof OpQuadPattern)
+ return true;
+
+ if (op instanceof OpFilter) {
+ OpFilter opf = (OpFilter) op;
+ // Expressions are always safe transform by substitution.
+ return safeToTransform(varsEquality, opf.getSubOp());
+ }
+
+ // This will be applied also in sub-calls of the Transform but queries
+ // are very rarely so deep that it matters.
+ if (op instanceof OpSequence) {
+ OpN opN = (OpN) op;
+ for (Op subOp : opN.getElements()) {
+ if (!safeToTransform(varsEquality, subOp))
+ return false;
+ }
+ return true;
+ }
+
+ if (op instanceof OpJoin || op instanceof OpUnion) {
+ Op2 op2 = (Op2) op;
+ return safeToTransform(varsEquality, op2.getLeft()) && safeToTransform(varsEquality, op2.getRight());
+ }
+
+ // Not safe unless filter variables are mentioned on the LHS.
+ if (op instanceof OpConditional || op instanceof OpLeftJoin) {
+ Op2 opleftjoin = (Op2) op;
+
+ if (!safeToTransform(varsEquality, opleftjoin.getLeft()) || !safeToTransform(varsEquality, opleftjoin.getRight()))
+ return false;
+
+ // Not only must the left and right be safe to transform,
+ // but the equality variable must be known to be always set.
+
+ // If the varsLeft are disjoint from assigned vars,
+ // we may be able to push assign down right
+ // (this generalises the unit table case specialcase1)
+ // Needs more investigation.
+
+ Op opLeft = opleftjoin.getLeft();
+ Set<Var> varsLeft = OpVars.visibleVars(opLeft);
+ if (varsLeft.containsAll(varsEquality))
+ return true;
+ return false;
+ }
+
+ if (op instanceof OpGraph) {
+ OpGraph opg = (OpGraph) op;
+ return safeToTransform(varsEquality, opg.getSubOp());
+ }
+
+ // Subquery - assume scope rewriting has already been applied.
+ if (op instanceof OpModifier) {
+ // ORDER BY?
+ OpModifier opMod = (OpModifier) op;
+ if (opMod instanceof OpProject) {
+ OpProject opProject = (OpProject) op;
+ // Writing "SELECT ?var" for "?var" -> a value would need
+ // AS-ification.
+ for (Var v : opProject.getVars()) {
+ if (varsEquality.contains(v))
+ return false;
+ }
+ }
+ return safeToTransform(varsEquality, opMod.getSubOp());
+ }
+
+ if (op instanceof OpGroup) {
+ OpGroup opGroup = (OpGroup) op;
+ VarExprList varExprList = opGroup.getGroupVars();
+ return safeToTransform(varsEquality, varExprList) && safeToTransform(varsEquality, opGroup.getSubOp());
+ }
+
+ if (op instanceof OpTable) {
+ OpTable opTable = (OpTable) op;
+ if (opTable.isJoinIdentity())
+ return true;
+ }
+
+ // Op1 - OpGroup
+ // Op1 - OpOrder
+ // Op1 - OpAssign, OpExtend
+ // Op1 - OpFilter - done.
+ // Op1 - OpLabel - easy
+ // Op1 - OpService - no.
+
+ return false;
+ }
+
+ private static boolean safeToTransform(Collection<Var> varsEquality, VarExprList varsExprList) {
+ // If the named variable is used, unsafe to rewrite.
+ return disjoint(varsExprList.getVars(), varsEquality);
+ }
+
+ // -- A special case
+
+ private static boolean testSpecialCaseUnused(Op op, List<Pair<Var, NodeValue>> equalities, ExprList remaining) {
+ // If the op does not contain the var at all, for some equality
+ // then the filter expression will be "eval unbound" i.e. false.
+ // We can return empty table.
+ Set<Var> patternVars = OpVars.visibleVars(op);
+ for (Pair<Var, NodeValue> p : equalities) {
+ if (!patternVars.contains(p.getLeft()))
+ return true;
+ }
+ return false;
+ }
+
+ // If a sequence of OPTIONALS, and nothing prior to the first, we end up
+ // with a unit table on the left side of a next of LeftJoin/conditionals.
+
+ private static boolean testSpecialCase1(Op op, List<Pair<Var, NodeValue>> equalities, ExprList remaining) {
+ while (op instanceof OpConditional || op instanceof OpLeftJoin) {
+ Op2 opleftjoin2 = (Op2) op;
+ op = opleftjoin2.getLeft();
+ }
+ return isUnitTable(op);
+ }
+
+ private static List<Op> extractOptionals(Op op) {
+ List<Op> chain = new ArrayList<Op>();
+ while (op instanceof OpConditional || op instanceof OpLeftJoin) {
+ Op2 opleftjoin2 = (Op2) op;
+ chain.add(opleftjoin2.getRight());
+ op = opleftjoin2.getLeft();
+ }
+ return chain;
+ }
+
+ private static List<Op> processSpecialCase1(List<Op> ops, List<Pair<Var, NodeValue>> inequalities) {
+ List<Op> ops2 = new ArrayList<Op>();
+ Collection<Var> vars = varsMentionedInInequalityFilters(inequalities);
+
+ for (Op op : ops) {
+ Op op2 = op;
+ if (safeToTransform(vars, op)) {
+ op2 = processFilterWorker(op, inequalities);
+ }
+ ops2.add(op2);
+ }
+ return ops2;
+ }
+
+ private static Op rebuild(Op2 subOp, List<Op> ops) {
+ Op chain = OpTable.unit();
+ for (Op op : ops) {
+ chain = subOp.copy(chain, op);
+ }
+ return chain;
+ }
+
+ private static boolean isUnitTable(Op op) {
+ if (op instanceof OpTable) {
+ if (((OpTable) op).isJoinIdentity())
+ return true;
+ }
+ return false;
+ }
+
+ private static Op processFilterWorker(Op op, List<Pair<Var, NodeValue>> inequalities) {
+ // Firstly find all the possible values for each variable
+ Map<Var, Set<NodeValue>> possibleValues = new HashMap<Var, Set<NodeValue>>();
+ for (Pair<Var, NodeValue> inequalityTest : inequalities) {
+ if (!possibleValues.containsKey(inequalityTest.getLeft())) {
+ possibleValues.put(inequalityTest.getLeft(), new HashSet<NodeValue>());
+ }
+ possibleValues.get(inequalityTest.getLeft()).add(inequalityTest.getRight());
+ }
+
+ // Then combine them into all possible rows to be eliminated
+ Table table = buildTable(possibleValues);
+
+ // Then apply the MINUS
+ return OpMinus.create(op, OpTable.create(table));
+ }
+
+ private static Table buildTable(Map<Var, Set<NodeValue>> possibleValues) {
+ if (possibleValues.size() == 0)
+ return TableFactory.createEmpty();
+ Table table = TableFactory.create();
+ buildRows(table, BindingFactory.create(), possibleValues);
+ return table;
+ }
+
+ private static void buildRows(Table table, BindingMap rowBase, Map<Var, Set<NodeValue>> possibleValues) {
+ if (possibleValues.size() == 0) {
+ table.addBinding(rowBase);
+ } else {
+ Var v = possibleValues.keySet().iterator().next();
+ Set<NodeValue> values = possibleValues.get(v);
+ possibleValues.remove(v);
+
+ for (NodeValue value : values) {
+ BindingMap b = BindingFactory.create(rowBase);
+ b.add(v, value.asNode());
+
+ buildRows(table, b, possibleValues);
+ }
+ }
+ }
+}
Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/AbstractTestTransform.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/AbstractTestTransform.java?rev=1555717&r1=1555716&r2=1555717&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/AbstractTestTransform.java (original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/AbstractTestTransform.java Mon Jan 6 11:23:25 2014
@@ -47,6 +47,7 @@ public abstract class AbstractTestTransf
public void testOp(String input, Transform transform, String... output) {
Op op1 = SSE.parseOp(input) ;
+ test(op1, transform, output);
}
public void test(Op op1, Transform transform, String... output) {
Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java?rev=1555717&r1=1555716&r2=1555717&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java (original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java Mon Jan 6 11:23:25 2014
@@ -27,6 +27,7 @@ import com.hp.hpl.jena.sparql.algebra.Tr
public class TestTransformFilters extends AbstractTestTransform
{
private Transform t_equality = new TransformFilterEquality() ;
+ private Transform t_inequality = new TransformFilterInequality() ;
private Transform t_disjunction = new TransformFilterDisjunction() ;
private Transform t_expandOneOf = new TransformExpandOneOf() ;
private Transform t_implicitJoin = new TransformFilterImplicitJoin() ;
@@ -905,4 +906,81 @@ public class TestTransformFilters extend
new TransformJoinStrategy(),
"(conditional (bgp (?x ?p ?o)) (assign ((?y ?x)) (bgp (?x ?p1 ?o1))))");
}
+
+ @Test public void inequality01()
+ {
+ testOp("(filter (!= ?x <x>) (bgp ( ?s ?p ?x)) )",
+ t_inequality,
+ "(minus (bgp ( ?s ?p ?x)) (table (vars ?x) (row [?x <x>])))") ;
+ }
+
+ @Test public void inequality02()
+ {
+ // Not safe on strings
+ testOp("(filter (!= ?x 'x') (bgp ( ?s ?p ?x)) )",
+ t_inequality,
+ (String[])null) ;
+ }
+
+ @Test public void inequality03()
+ {
+ // Not safe on numbers
+ testOp("(filter (!= ?x 123) (bgp ( ?s ?p ?x)) )",
+ t_inequality,
+ (String[])null) ;
+ }
+
+ @Test public void inequality04()
+ {
+ // Unused
+ testOp("(filter (!= ?UNUSED <x>) (bgp ( ?s ?p ?x)) )",
+ t_inequality,
+ "(table empty)") ;
+ }
+
+ @Test public void inequality05()
+ {
+ // Can't optimize if filter does not cover vars in LHS
+ testOp("(filter (!= ?x2 <x>) (conditional (bgp ( ?s1 ?p1 ?x1)) (bgp ( ?s2 ?p2 ?x2))))",
+ t_equality,
+ (String[])null) ;
+ }
+
+ @Test public void inequality06()
+ {
+ testOp("(filter (!= ?x <x>) (conditional (bgp ( ?s ?p ?x)) (bgp ( ?s ?p ?x))))",
+ t_inequality,
+ "(minus (conditional (bgp ( ?s ?p ?x)) (bgp ( ?s ?p ?x))) (table (vars ?x) (row [?x <x>])))") ;
+ }
+
+ @Test public void inequality07()
+ {
+ testOp("(filter (!= ?x <x>) (conditional (bgp ( ?s ?p ?x)) (bgp ( ?s ?p ?x1))))",
+ t_inequality,
+ "(minus (conditional (bgp ( ?s ?p ?x)) (bgp ( ?s ?p ?x1))) (table (vars ?x) (row [?x <x>])))") ;
+ }
+
+ @Test public void inequality08()
+ {
+ // Tests use of multiple != conditions on same variable
+ testOp("(filter ((!= ?x <x>) (!= ?x <y>)) (bgp ( ?s ?p ?x)) )",
+ t_inequality,
+ "(minus (bgp (?s ?p ?x)) (table (vars ?p ?x) (row [?x <y>]) (row [?x <x>])))") ;
+ }
+
+ @Test public void inequality09()
+ {
+ // Tests use of multiple != conditions on different variables
+ testOp("(filter ((!= ?x <x>) (!= ?p <y>)) (bgp ( ?s ?p ?x)) )",
+ t_inequality,
+ "(minus (bgp (?s ?p ?x)) (table (vars ?p ?x) (row [?p <y>] [?x <x>])))") ;
+ }
+
+ @Test public void inequality10()
+ {
+ // Tests use of multiple != conditions on both same and different variables
+ testOp("(filter ((!= ?x <x>) (!= ?x <y>) (!= ?p <type>)) (bgp ( ?s ?p ?x)) )",
+ t_inequality,
+ "(minus (bgp (?s ?p ?x)) (table (vars ?p ?x) (row [?p <type>] [?x <y>]) (row [?p <type>] [?x <x>])))") ;
+ }
}