You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/09/09 11:24:08 UTC

[3/5] jena git commit: Single variable hash keys (for now).

Single variable hash keys (for now).

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/ee103d9b
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/ee103d9b
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/ee103d9b

Branch: refs/heads/master
Commit: ee103d9b906e1c9bebf13c612807699a09cf87c8
Parents: 20f17a1
Author: Andy Seaborne <an...@apache.org>
Authored: Wed Sep 9 09:44:58 2015 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Wed Sep 9 09:44:58 2015 +0100

----------------------------------------------------------------------
 .../apache/jena/sparql/engine/join/JoinKey.java | 26 ++++++++++++++------
 .../sparql/engine/join/QueryIterHashJoin.java   |  6 ++++-
 .../engine/join/AbstractTestInnerJoin.java      | 16 +++++++++++-
 .../sparql/engine/join/AbstractTestJoin.java    | 26 ++++++++++++++++++++
 .../engine/join/AbstractTestLeftJoin.java       |  1 -
 5 files changed, 65 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java
index a4a067d..0fd47b6 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java
@@ -28,22 +28,35 @@ import org.apache.jena.sparql.core.Var ;
 /** JoinKey for hash joins */
 public final class JoinKey implements Iterable<Var>
 {
+    private static final JoinKey emptyKey = new JoinKey(DS.listOfNone()) ;
+
     // Common way to make a JoinKey
     /** Make a JoinKey from the intersection of two sets **/  
-    
     public static JoinKey create(Collection<Var> vars1, Collection<Var> vars2) {
         // JoinKeys are generally small so short loops are best.
         // vars2 may be smallest e.g. from triple and running accumulator (vars1) 
         List<Var> intersection = DS.list() ;
         for ( Var v : vars1 ) {
             if ( vars2.contains(v) )
+                intersection.add(v) ;  
+        }
+        return new JoinKey(intersection) ;
+    }
+    
+    /** Make a JoinKey of single variable from the intersection of two sets **/  
+    public static JoinKey createVarKey(Collection<Var> vars1, Collection<Var> vars2) {
+        // JoinKeys are generally small so short loops are best.
+        // vars2 may be smallest e.g. from triple and running accumulator (vars1) 
+        List<Var> intersection = DS.list() ;
+        for ( Var v : vars1 ) {
+            if ( vars2.contains(v) )
                 // First and single key.
                 return create(v) ;
                 // Compound keys needs validation : what if they are partial
                 // i.e. some rows only have part of the join key?
                 //intersection.add(v) ;  
         }
-        return new JoinKey(intersection) ;
+        return emptyKey ;
     }
     
     public static JoinKey create(Var var) {
@@ -87,9 +100,11 @@ public final class JoinKey implements Iterable<Var>
     
     private JoinKey(List<Var> _keys) { keys = _keys ; }
     
-    private JoinKey(Var var)     { keys = DS.listOfOne(var) ; }
+    private JoinKey(Var var)        { keys = DS.listOfOne(var) ; }
     
-    public boolean isEmpty()    { return keys.isEmpty() ; }
+    public boolean isEmpty()        { return keys.isEmpty() ; }
+    
+    public int length()             { return keys.size() ; }
 
     /** Get a single variable for this key. 
      *  For any one key, it always returns the same var */ 
@@ -107,6 +122,3 @@ public final class JoinKey implements Iterable<Var>
         return keys.toString() ;
     }
 }
-
-
-

http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java
index 1b4b29a..98e779d 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java
@@ -22,6 +22,7 @@ import java.util.Iterator ;
 import java.util.List ;
 
 import org.apache.jena.atlas.iterator.Iter ;
+import org.apache.jena.atlas.logging.Log ;
 import org.apache.jena.sparql.algebra.Algebra ;
 import org.apache.jena.sparql.core.Var ;
 import org.apache.jena.sparql.engine.ExecutionContext ;
@@ -69,6 +70,9 @@ public class QueryIterHashJoin extends QueryIter2 {
             right.close() ;
             return QueryIterNullIterator.create(execCxt) ;
         }
+        if ( joinKey != null && joinKey.length() > 1 )
+            Log.warn(QueryIterHashJoin.class, "Multivariable join key") ; 
+        
         return new QueryIterHashJoin(joinKey, left, right, execCxt) ; 
     }
     
@@ -96,7 +100,7 @@ public class QueryIterHashJoin extends QueryIter2 {
             
             List<Var> varsLeft = Iter.toList(bLeft.vars()) ;
             List<Var> varsRight = Iter.toList(bRight.vars()) ;
-            joinKey = JoinKey.create(varsLeft, varsRight) ;
+            joinKey = JoinKey.createVarKey(varsLeft, varsRight) ;
             left = pLeft ;
             right = pRight ;
         }

http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java
index 0242d4a..5152e4e 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java
@@ -19,6 +19,7 @@
 package org.apache.jena.sparql.engine.join;
 
 import org.apache.jena.sparql.algebra.Table ;
+import org.apache.jena.sparql.core.Var ;
 import org.apache.jena.sparql.expr.ExprList ;
 import org.junit.Test ;
 
@@ -67,10 +68,23 @@ public abstract class AbstractTestInnerJoin extends AbstractTestJoin {
     // No key.
     @Test public void join_14() { testJoin(null, tableD1(), tableD2(), tableD3()) ; }
 
-
+    @Test public void join_skew_01() { testJoin("x", tableS1(), tableS2(), tableS1J2()) ; }
+    @Test public void join_skew_02() { testJoin("w", tableS1(), tableS2(), tableS1J2()) ; }
+    @Test public void join_skew_03() { testJoin(null, tableS1(), tableS2(), tableS1J2()) ; }
+    //@Test
+    // Multiple variable join keys on skew data don't work. 
+    public void join_skew_04() { 
+        JoinKey joinKey = new JoinKey.Builder()
+            .add(Var.alloc("x"))
+            .add(Var.alloc("w"))
+            .build() ;
+        testJoinWithKey(joinKey, tableS1(), tableS2(), tableS1J2()) ; 
+    }
+    
     // Disjoint tables.
     @Test public void join_disjoint_01() { testJoin("a", tableD2(), tableD8(), tableD8x2()) ; }
     @Test public void join_disjoint_02() { testJoin("z", tableD2(), tableD8(), tableD8x2()) ; }
+    @Test public void join_disjoint_03() { testJoin(null, tableD2(), tableD8(), tableD8x2()) ; }
 }
 
 

http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java
index 434af09..dd16393 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java
@@ -216,6 +216,24 @@ public abstract class AbstractTestJoin extends Assert {
             ")") ;
     }
     
+    // Skew tables for join testing.
+    // Join keys of ?x ?w and [?x , ?w]
+    
+    protected static Table tableS1() {
+        return parseTableInt("(table"
+                             ,"  (row (?z <http://example/z1>) (?x <http://example/x>) (?w 'w11-1'))"
+                             ,"  (row (?z <http://example/z4>) (?x <http://example/x>)))"
+                            ); }
+    protected static Table tableS2() {
+        return parseTableInt("(table (row (?x <http://example/x>) (?w <http://example/z1>)))") ;
+    }
+    
+    protected static Table tableS1J2() {
+        return parseTableInt("(table" 
+                             ,"  (row (?z <http://example/z4>) (?x <http://example/x>) (?w <http://example/z1>) ))" 
+                            ); 
+    }
+    
     // Code
 
     protected static Table parseTableInt(String... strings) {
@@ -244,6 +262,14 @@ public abstract class AbstractTestJoin extends Assert {
         executeTest(joinKey, left, right, null, tableOut) ;
     }
 
+    protected void testJoinWithKey(JoinKey joinKey, Table left, Table right, Table tableOut) {
+        executeTest(joinKey, left, right, null, tableOut) ;
+    }
+
+    protected void testJoinWithKey(JoinKey joinKey, Table left, Table right, ExprList conditions, Table tableOut) {
+        executeTest(joinKey, left, right, conditions, tableOut) ;
+    }
+
     // Any kind of join (choose by abstract join() operation).
     protected abstract void executeTest(JoinKey joinKey, Table left, Table right, ExprList conditions, Table expectedResults) ;
     

http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java
index 2786bed..afca7db 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java
@@ -66,7 +66,6 @@ public abstract class AbstractTestLeftJoin extends AbstractTestJoin {
     @Test public void leftjoin_J13()        { testJoin("z", tableD2(), tableD1(), tableD3_LJ()) ; }
 
     // No key.
-    
     @Test public void leftjoin_14()         { testJoin(null, tableD1(), tableD2(), tableD3()) ; }