You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2014/05/13 17:57:13 UTC

svn commit: r1594256 - in /jena/trunk/jena-arq/src: main/java/com/hp/hpl/jena/sparql/algebra/optimize/ test/java/com/hp/hpl/jena/sparql/algebra/optimize/

Author: rvesse
Date: Tue May 13 15:57:13 2014
New Revision: 1594256

URL: http://svn.apache.org/r1594256
Log:
Improvements for filter implicit join to address the bug identified as JENA-692
The transformation is now more careful about checking whether it is valid to apply over union
It is also now capable of special casing unions where one branch can be seen to always evaluate to false in advance

Add various tests cases for these changes

Modified:
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterImplicitJoin.java
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformImplicitLeftJoin.java
    jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/AbstractTestTransform.java
    jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterImplicitJoin.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterImplicitJoin.java?rev=1594256&r1=1594255&r2=1594256&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterImplicitJoin.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterImplicitJoin.java Tue May 13 15:57:13 2014
@@ -130,10 +130,10 @@ public class TransformFilterImplicitJoin
         // Special case: the deep left op of a OpConditional/OpLeftJoin is unit
         // table.
         // This is { OPTIONAL{P1} OPTIONAL{P2} ... FILTER(?x = :x) }
-        if (testSpecialCase1(subOp, joins, remaining)) {
+        if (testSpecialCaseOptional(subOp, joins, remaining)) {
             // Find backbone of ops
             List<Op> ops = extractOptionals(subOp);
-            ops = processSpecialCase1(ops, joins);
+            ops = processSpecialCaseOptional(ops, joins);
             // Put back together
             op = rebuild((Op2) subOp, ops);
             // Put all filters - either we optimized, or we left alone.
@@ -141,6 +141,16 @@ public class TransformFilterImplicitJoin
             op = OpFilter.filter(exprs, op);
             return op;
         }
+        
+        // Special case : filter is over a union where one/both sides are always false
+        if (testSpecialCaseUnion(subOp, joins)) {
+        	// This will attempt to eliminate the sides that are always false
+        	op = processSpecialCaseUnion(subOp, joins);
+        	
+        	// In the case where both sides were invalid we'll have a table empty 
+        	// operator at this point and can return immediately
+        	if (op instanceof OpTable) return op;
+        }
 
         // ---- Transform
 
@@ -176,8 +186,6 @@ public class TransformFilterImplicitJoin
     }
 
     private static Pair<Var, Var> preprocess(Op subOp, Expr e) {
-        // TODO Should also handle the case of && as TransformImplicitLeftJoin
-        // is already capable of doing
         if (!(e instanceof E_Equals) && !(e instanceof E_SameTerm))
             return null;
 
@@ -276,10 +284,20 @@ public class TransformFilterImplicitJoin
             return true;
         }
 
-        if (op instanceof OpJoin || op instanceof OpUnion) {
+        if (op instanceof OpJoin) {
             Op2 op2 = (Op2) op;
             return safeToTransform(joins, varsEquality, op2.getLeft()) && safeToTransform(joins, varsEquality, op2.getRight());
         }
+        
+        if (op instanceof OpUnion) {
+        	// True only if for any pairs that affect the pattern both variables occur
+        	Set<Var> fixedVars = OpVars.fixedVars(op);
+        	for (Pair<Var, Var> pair : joins) {
+        		if (fixedVars.contains(pair.getLeft()) && !fixedVars.contains(pair.getRight())) return false;
+        		if (!fixedVars.contains(pair.getLeft()) && fixedVars.contains(pair.getRight())) return false;
+        	}
+        	return true;
+        }
 
         // Not safe unless filter variables are mentioned on the LHS.
         if (op instanceof OpConditional || op instanceof OpLeftJoin) {
@@ -369,13 +387,29 @@ public class TransformFilterImplicitJoin
     // If a sequence of OPTIONALS, and nothing prior to the first, we end up
     // with a unit table on the left side of a next of LeftJoin/conditionals.
 
-    private static boolean testSpecialCase1(Op op, List<Pair<Var, Var>> joins, ExprList remaining) {
+    private static boolean testSpecialCaseOptional(Op op, List<Pair<Var, Var>> joins, ExprList remaining) {
         while (op instanceof OpConditional || op instanceof OpLeftJoin) {
             Op2 opleftjoin2 = (Op2) op;
             op = opleftjoin2.getLeft();
         }
         return isTableUnit(op);
     }
+    
+    private static boolean testSpecialCaseUnion(Op op, List<Pair<Var, Var>> joins) {
+    	if (op instanceof OpUnion) {
+    		OpUnion union = (OpUnion) op;
+    		Set<Var> leftVars = OpVars.visibleVars(union.getLeft());
+    		Set<Var> rightVars = OpVars.visibleVars(union.getRight());
+    		
+    		// Is a special case if there is any implicit join where only one of the variables mentioned in an
+    		// implicit join is present on one side of the union
+    		for (Pair<Var, Var> p : joins) {
+    			if (!leftVars.contains(p.getLeft()) || !leftVars.contains(p.getRight())) return true;
+    			if (!rightVars.contains(p.getLeft()) || !rightVars.contains(p.getRight())) return true;
+    		}
+    	}
+    	return false;
+    }
 
     private static List<Op> extractOptionals(Op op) {
         List<Op> chain = new ArrayList<Op>();
@@ -387,7 +421,7 @@ public class TransformFilterImplicitJoin
         return chain;
     }
 
-    private static List<Op> processSpecialCase1(List<Op> ops, List<Pair<Var, Var>> joins) {
+    private static List<Op> processSpecialCaseOptional(List<Op> ops, List<Pair<Var, Var>> joins) {
         List<Op> ops2 = new ArrayList<Op>();
         Collection<Var> vars = varsMentionedInImplictJoins(joins);
 
@@ -417,6 +451,30 @@ public class TransformFilterImplicitJoin
         }
         return false;
     }
+    
+    private static Op processSpecialCaseUnion(Op op, List<Pair<Var, Var>> joins) {
+    	if (op instanceof OpUnion) {
+    		OpUnion union = (OpUnion) op;
+    		
+    		Set<Var> leftVars = OpVars.visibleVars(union.getLeft());
+    		Set<Var> rightVars = OpVars.visibleVars(union.getRight());
+    		
+    		// Is a special case if there is any implicit join where only one of the variables mentioned in an
+    		// implicit join is present on one side of the union
+    		boolean leftEmpty = false, rightEmpty = false;
+    		for (Pair<Var, Var> p : joins) {
+    			if (leftEmpty || !leftVars.contains(p.getLeft()) || !leftVars.contains(p.getRight())) leftEmpty = true;
+    			if (rightEmpty || !rightVars.contains(p.getLeft()) || !rightVars.contains(p.getRight())) rightEmpty = true;
+    		}
+    		
+    		// If both sides of the union guarantee to produce errors then just replace the whole thing with table empty
+    		if (leftEmpty && rightEmpty) return OpTable.empty();
+    		if (leftEmpty) return union.getRight();
+    		if (rightEmpty) return union.getLeft();
+    	}
+    	// Leave untouched
+    	return op;
+    }
 
     // ---- Transformation
 

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformImplicitLeftJoin.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformImplicitLeftJoin.java?rev=1594256&r1=1594255&r2=1594256&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformImplicitLeftJoin.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformImplicitLeftJoin.java Tue May 13 15:57:13 2014
@@ -328,10 +328,20 @@ public class TransformImplicitLeftJoin e
             return true;
         }
 
-        if (op instanceof OpJoin || op instanceof OpUnion) {
+        if (op instanceof OpJoin) {
             Op2 op2 = (Op2) op;
             return safeToTransform(joins, varsEquality, op2.getLeft()) && safeToTransform(joins, varsEquality, op2.getRight());
         }
+        
+        if (op instanceof OpUnion) {
+        	// True only if for any pairs that affect the pattern both variables occur
+        	Set<Var> fixedVars = OpVars.fixedVars(op);
+        	for (Pair<Var, Var> pair : joins) {
+        		if (fixedVars.contains(pair.getLeft()) && !fixedVars.contains(pair.getRight())) return false;
+        		if (!fixedVars.contains(pair.getLeft()) && fixedVars.contains(pair.getRight())) return false;
+        	}
+        	return true;
+        }
 
         // Not safe unless filter variables are mentioned on the LHS.
         if (op instanceof OpConditional || op instanceof OpLeftJoin) {

Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/AbstractTestTransform.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/AbstractTestTransform.java?rev=1594256&r1=1594255&r2=1594256&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/AbstractTestTransform.java (original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/AbstractTestTransform.java Tue May 13 15:57:13 2014
@@ -81,12 +81,20 @@ public abstract class AbstractTestTransf
         Op opExpected = SSE.parseOp(opExpectedString) ;
         assertEquals(opExpected, opOptimize) ;
     }
+    
+    public static void checkAlgebra(String algString, Transform additionalOptimizer, String opExpectedString) {
+        Op algebra = SSE.parseOp(algString) ;
+        algebra = Algebra.optimize(algebra) ;
+        algebra = Transformer.transform(additionalOptimizer, algebra);
+        Op opExpected = SSE.parseOp(opExpectedString != null ? opExpectedString : algString);
+        assertEquals(opExpected, algebra) ;
+    }
 
-    private static void checkAlgebra(String algString, String opExpectedString) {
+    public static void checkAlgebra(String algString, String opExpectedString) {
         Op algebra = SSE.parseOp(algString) ;
         algebra = Algebra.optimize(algebra) ;
-        Op opExpexpected = SSE.parseOp(opExpectedString) ;
-        assertEquals(opExpexpected, algebra) ;
+        Op opExpected = SSE.parseOp(opExpectedString != null ? opExpectedString : algString);
+        assertEquals(opExpected, algebra) ;
     }
 
 }

Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java?rev=1594256&r1=1594255&r2=1594256&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java (original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java Tue May 13 15:57:13 2014
@@ -282,7 +282,7 @@ public class TestTransformFilters extend
               , "    ?x :p ?o2"
               , "}"
                 ) ;
-        // Transformation if ilter place not happening.
+        // Transformation if filter place not happening.
         // Weaker placement.
         String ops = StrUtils.strjoinNL
             ("(assign ((?x <http://example/x>))"
@@ -476,7 +476,7 @@ public class TestTransformFilters extend
              "(filter (exprlist (!= ?x <x>) (!= ?x 2) (!= ?x 3)) (bgp (?s ?p ?x)))") ;
     }
     
-    @Test public void implicitJoin1()
+    @Test public void implicitJoin01()
     {
         testOp(
              "(filter (= ?x ?y) (bgp (?x ?p ?o)(?y ?p1 ?o1)))",
@@ -484,7 +484,7 @@ public class TestTransformFilters extend
              "(assign ((?x ?y)) (bgp (?y ?p ?o)(?y ?p1 ?o1)))");
     }
     
-    @Test public void implicitJoin2()
+    @Test public void implicitJoin02()
     {
         testOp(
              "(filter (= ?x ?y) (bgp (?x ?p ?o)))",
@@ -492,7 +492,7 @@ public class TestTransformFilters extend
              "(table empty)");
     }
     
-    @Test public void implicitJoin3()
+    @Test public void implicitJoin03()
     {
         // Still safe to transform as at least one is guaranteed non-literal
         testOp(
@@ -501,7 +501,7 @@ public class TestTransformFilters extend
              "(assign ((?x ?y)) (bgp (?y ?p ?o)(?a ?b ?y)))");
     }
     
-    @Test public void implicitJoin4()
+    @Test public void implicitJoin04()
     {
         // Not safe to transform as both may be literals
         testOp(
@@ -510,7 +510,7 @@ public class TestTransformFilters extend
              "(filter (= ?x ?y) (bgp (?a ?b ?x)(?c ?d ?y)))");
     }
     
-    @Test public void implicitJoin5()
+    @Test public void implicitJoin05()
     {
         // Safe to transform as although both may be literals we are using sameTerm so already relying on term equality
         testOp(
@@ -519,7 +519,7 @@ public class TestTransformFilters extend
              "(assign ((?x ?y)) (bgp (?a ?b ?y)(?c ?d ?y)))");
     }
     
-    @Test public void implicitJoin6()
+    @Test public void implicitJoin06()
     {
         // Not safe to transform as equality on same variable
         testOp(
@@ -528,7 +528,7 @@ public class TestTransformFilters extend
              (String[])null);
     }
     
-    @Test public void implicitJoin7()
+    @Test public void implicitJoin07()
     {
         testOp(
              "(filter ((= ?x ?y) (= ?x ?z)) (bgp (?x ?p ?o)(?y ?p1 ?z)))",
@@ -536,7 +536,7 @@ public class TestTransformFilters extend
              (String[])null);
     }
     
-    @Test public void implicitJoin8()
+    @Test public void implicitJoin08()
     {
         testOp(
              "(filter (= ?x ?y) (join (bgp (?x ?p ?o)) (bgp (?y ?p1 ?o1))))",
@@ -549,7 +549,7 @@ public class TestTransformFilters extend
                 "(assign ((?y ?x)) (join (bgp (?x ?p ?o)) (bgp (?x ?p1 ?o1))))");
     }
     
-    @Test public void implicitJoin9()
+    @Test public void implicitJoin09()
     {
         // A variable introduced by an assign cannot be considered safe
         testOp(
@@ -578,8 +578,71 @@ public class TestTransformFilters extend
             t_implicitJoin,
             "(table empty)");
     }
+    
+    @Test public void implicitJoin12()
+    {
+    	// Test case from JENA-692
+    	// Implicit join should not apply to the whole union because the variables involved aren't fixed
+    	// However we can spot the special case of one branch of the union being invalid and throw it out
+    	testOp(
+			"(filter (= ?a ?b) (union (bgp (triple ?a :p :o1)) (bgp (triple ?b :p ?a))))", 
+			t_implicitJoin,
+			"(assign ((?a ?b)) (bgp (triple ?b :p ?b)))");
+    }
+    
+    @Test public void implicitJoin13()
+    {
+    	// Variation on test case from JENA-692
+    	// Implicit join should not apply to the whole union because the variables involved aren't fixed
+    	// However we can spot the special case of one branch of the union being invalid and throw it out
+    	testOp(
+			"(filter (= ?a ?b) (union (bgp (triple ?b :p ?a)) (bgp (triple ?a :p :o1))))", 
+			t_implicitJoin,
+			"(assign ((?a ?b)) (bgp (triple ?b :p ?b)))");
+    }
+    
+    @Test public void implicitJoin14()
+    {
+    	// Variation on test case from JENA-692
+    	// Implicit join should not apply to the whole union because the variables involved aren't fixed
+    	// However we can spot the special case where both branches are invalid
+    	testOp(
+			"(filter (= ?a ?b) (union (bgp (triple ?b :p :o1)) (bgp (triple ?a :p :o2))))", 
+			t_implicitJoin,
+			"(table empty)");
+    }
         
-    @Test public void implicitLeftJoin1()
+    @Test public void implicitJoin15()
+    {
+    	// Variation on the test case from JENA-692 with additional assignment added
+    	// Implicit join should not apply to the whole union because not all assignments are valid
+    	testOp(
+			"(filter ((= ?a ?b) (= ?a ?p)) (union (bgp (triple ?a ?p :o1)) (bgp (triple ?b ?p ?a))))", 
+			t_implicitJoin,
+			(String[])null);
+    }
+    
+    @Test public void implicitJoin16()
+    {
+    	// Variation on test case from JENA-692
+    	// Implicit join can apply because assignments are valid over both branches of the union
+    	testOp(
+			"(filter (= ?a ?b) (union (bgp (triple ?a :p ?b)) (bgp (triple ?b :p ?a))))", 
+			t_implicitJoin,
+			"(assign ((?a ?b)) (union (bgp (triple ?b :p ?b)) (bgp (triple ?b :p ?b))))");
+    }
+    
+    @Test public void implicitJoin17()
+    {
+    	// Variation on test case from JENA-692
+    	// Implicit join can apply because the filter is over the branch of the union where it is valid
+    	testOp(
+			"(union (bgp (triple ?a :p :o1)) (filter (= ?a ?b) (bgp (triple ?b :p ?a))))", 
+			t_implicitJoin,
+			"(union (bgp (triple ?a :p :o1)) (assign ((?a ?b)) (bgp (triple ?b :p ?b))))");
+    }
+        
+    @Test public void implicitLeftJoin01()
     {
         // Possible to optimize some cases where it's an implicit left join
         
@@ -596,7 +659,7 @@ public class TestTransformFilters extend
                 "(leftjoin (bgp (?x ?p ?o)) (assign ((?y ?x)) (bgp (?x ?p1 ?o1))))");
     }
     
-    @Test public void implicitLeftJoin2()
+    @Test public void implicitLeftJoin02()
     {
         // Possible to optimize some cases where it's an implicit left join
         
@@ -613,7 +676,7 @@ public class TestTransformFilters extend
                 "(leftjoin (bgp (?y ?p ?o)) (assign ((?x ?y)) (bgp (?y ?p1 ?o1))))");
     }
     
-    @Test public void implicitLeftJoin3()
+    @Test public void implicitLeftJoin03()
     {
         // Possible to optimize some cases where it's an implicit left join
         
@@ -630,7 +693,7 @@ public class TestTransformFilters extend
                 "(leftjoin (bgp (?x ?p ?o)) (assign ((?y ?x)) (bgp (?x <http://type> ?type)(?x ?p1 ?o1))))");
     }
     
-    @Test public void implicitLeftJoin4()
+    @Test public void implicitLeftJoin04()
     {
         // Possible to optimize some cases where it's an implicit left join
         
@@ -647,7 +710,7 @@ public class TestTransformFilters extend
                 "(leftjoin (bgp (?y ?p ?o)) (assign ((?x ?y)) (bgp (?y <http://type> ?type)(?y ?p1 ?o1))))");
     }
     
-    @Test public void implicitLeftJoin5()
+    @Test public void implicitLeftJoin05()
     {
         // Possible to optimize some cases where it's an implicit left join
         
@@ -664,7 +727,7 @@ public class TestTransformFilters extend
                 "(leftjoin (bgp (?x ?p ?o)(?x <http://pred> ?y)) (assign ((?y ?x)) (bgp (?x ?p1 ?o1))))");
     }
     
-    @Test public void implicitLeftJoin6()
+    @Test public void implicitLeftJoin06()
     {
         // Possible to optimize some cases where it's an implicit left join
         
@@ -681,7 +744,7 @@ public class TestTransformFilters extend
                 "(leftjoin (bgp (?x ?p ?o)(?x <http://pred> ?y)) (assign ((?x ?y)) (bgp (?y ?p1 ?o1))))");
     }
     
-    @Test public void implicitLeftJoin7()
+    @Test public void implicitLeftJoin07()
     {
         // Possible to optimize some cases where it's an implicit left join
         
@@ -698,7 +761,7 @@ public class TestTransformFilters extend
                 "(leftjoin (bgp (?x ?p ?o)(?x <http://pred> ?y)) (assign ((?y ?x)) (bgp (?x <http://type> ?type)(?x ?p1 ?o1))))");
     }
     
-    @Test public void implicitLeftJoin8()
+    @Test public void implicitLeftJoin08()
     {
         // We don't currently optimize the case where the filter will evaluate to false
         // for all solutions because neither variable in on the RHS
@@ -708,7 +771,7 @@ public class TestTransformFilters extend
              (String[])null);
     }
         
-    @Test public void implicitLeftJoin9()
+    @Test public void implicitLeftJoin09()
     {
         // && means both conditions must hold so can optimize out the implicit join
         testOp(
@@ -911,7 +974,17 @@ public class TestTransformFilters extend
              "(leftjoin (table unit) (assign ((?y ?x)) (bgp (?x ?p ?o)(?x <http://pred> ?x))))");
     }
     
-    @Test public void implicitLeftJoinConditional1()
+    @Test public void implicitLeftJoin24()
+    {
+    	// Modified test case from JENA-692
+    	// Implicit join should not apply to the whole union because the variables involved aren't fixed
+    	testOp(
+			"(leftjoin (bgp (triple ?a <http://type> ?type)) (union (bgp (triple ?a :p :o1)) (bgp (triple ?b :p ?a))) ((= ?a ?b)))", 
+			t_implicitLeftJoin,
+			(String[])null);
+    }
+    
+    @Test public void implicitLeftJoinConditional01()
     {
         // Can be optimized because not all assigns block linearization
         testOp(