You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/09/09 11:24:08 UTC
[3/5] jena git commit: Single variable hash keys (for now).
Single variable hash keys (for now).
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/ee103d9b
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/ee103d9b
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/ee103d9b
Branch: refs/heads/master
Commit: ee103d9b906e1c9bebf13c612807699a09cf87c8
Parents: 20f17a1
Author: Andy Seaborne <an...@apache.org>
Authored: Wed Sep 9 09:44:58 2015 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Wed Sep 9 09:44:58 2015 +0100
----------------------------------------------------------------------
.../apache/jena/sparql/engine/join/JoinKey.java | 26 ++++++++++++++------
.../sparql/engine/join/QueryIterHashJoin.java | 6 ++++-
.../engine/join/AbstractTestInnerJoin.java | 16 +++++++++++-
.../sparql/engine/join/AbstractTestJoin.java | 26 ++++++++++++++++++++
.../engine/join/AbstractTestLeftJoin.java | 1 -
5 files changed, 65 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java
index a4a067d..0fd47b6 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java
@@ -28,22 +28,35 @@ import org.apache.jena.sparql.core.Var ;
/** JoinKey for hash joins */
public final class JoinKey implements Iterable<Var>
{
+ private static final JoinKey emptyKey = new JoinKey(DS.listOfNone()) ;
+
// Common way to make a JoinKey
/** Make a JoinKey from the intersection of two sets **/
-
public static JoinKey create(Collection<Var> vars1, Collection<Var> vars2) {
// JoinKeys are generally small so short loops are best.
// vars2 may be smallest e.g. from triple and running accumulator (vars1)
List<Var> intersection = DS.list() ;
for ( Var v : vars1 ) {
if ( vars2.contains(v) )
+ intersection.add(v) ;
+ }
+ return new JoinKey(intersection) ;
+ }
+
+ /** Make a JoinKey of single variable from the intersection of two sets **/
+ public static JoinKey createVarKey(Collection<Var> vars1, Collection<Var> vars2) {
+ // JoinKeys are generally small so short loops are best.
+ // vars2 may be smallest e.g. from triple and running accumulator (vars1)
+ List<Var> intersection = DS.list() ;
+ for ( Var v : vars1 ) {
+ if ( vars2.contains(v) )
// First and single key.
return create(v) ;
// Compound keys needs validation : what if they are partial
// i.e. some rows only have part of the join key?
//intersection.add(v) ;
}
- return new JoinKey(intersection) ;
+ return emptyKey ;
}
public static JoinKey create(Var var) {
@@ -87,9 +100,11 @@ public final class JoinKey implements Iterable<Var>
private JoinKey(List<Var> _keys) { keys = _keys ; }
- private JoinKey(Var var) { keys = DS.listOfOne(var) ; }
+ private JoinKey(Var var) { keys = DS.listOfOne(var) ; }
- public boolean isEmpty() { return keys.isEmpty() ; }
+ public boolean isEmpty() { return keys.isEmpty() ; }
+
+ public int length() { return keys.size() ; }
/** Get a single variable for this key.
* For any one key, it always returns the same var */
@@ -107,6 +122,3 @@ public final class JoinKey implements Iterable<Var>
return keys.toString() ;
}
}
-
-
-
http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java
index 1b4b29a..98e779d 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java
@@ -22,6 +22,7 @@ import java.util.Iterator ;
import java.util.List ;
import org.apache.jena.atlas.iterator.Iter ;
+import org.apache.jena.atlas.logging.Log ;
import org.apache.jena.sparql.algebra.Algebra ;
import org.apache.jena.sparql.core.Var ;
import org.apache.jena.sparql.engine.ExecutionContext ;
@@ -69,6 +70,9 @@ public class QueryIterHashJoin extends QueryIter2 {
right.close() ;
return QueryIterNullIterator.create(execCxt) ;
}
+ if ( joinKey != null && joinKey.length() > 1 )
+ Log.warn(QueryIterHashJoin.class, "Multivariable join key") ;
+
return new QueryIterHashJoin(joinKey, left, right, execCxt) ;
}
@@ -96,7 +100,7 @@ public class QueryIterHashJoin extends QueryIter2 {
List<Var> varsLeft = Iter.toList(bLeft.vars()) ;
List<Var> varsRight = Iter.toList(bRight.vars()) ;
- joinKey = JoinKey.create(varsLeft, varsRight) ;
+ joinKey = JoinKey.createVarKey(varsLeft, varsRight) ;
left = pLeft ;
right = pRight ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java
index 0242d4a..5152e4e 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java
@@ -19,6 +19,7 @@
package org.apache.jena.sparql.engine.join;
import org.apache.jena.sparql.algebra.Table ;
+import org.apache.jena.sparql.core.Var ;
import org.apache.jena.sparql.expr.ExprList ;
import org.junit.Test ;
@@ -67,10 +68,23 @@ public abstract class AbstractTestInnerJoin extends AbstractTestJoin {
// No key.
@Test public void join_14() { testJoin(null, tableD1(), tableD2(), tableD3()) ; }
-
+ @Test public void join_skew_01() { testJoin("x", tableS1(), tableS2(), tableS1J2()) ; }
+ @Test public void join_skew_02() { testJoin("w", tableS1(), tableS2(), tableS1J2()) ; }
+ @Test public void join_skew_03() { testJoin(null, tableS1(), tableS2(), tableS1J2()) ; }
+ //@Test
+ // Multiple variable join keys on skew data don't work.
+ public void join_skew_04() {
+ JoinKey joinKey = new JoinKey.Builder()
+ .add(Var.alloc("x"))
+ .add(Var.alloc("w"))
+ .build() ;
+ testJoinWithKey(joinKey, tableS1(), tableS2(), tableS1J2()) ;
+ }
+
// Disjoint tables.
@Test public void join_disjoint_01() { testJoin("a", tableD2(), tableD8(), tableD8x2()) ; }
@Test public void join_disjoint_02() { testJoin("z", tableD2(), tableD8(), tableD8x2()) ; }
+ @Test public void join_disjoint_03() { testJoin(null, tableD2(), tableD8(), tableD8x2()) ; }
}
http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java
index 434af09..dd16393 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java
@@ -216,6 +216,24 @@ public abstract class AbstractTestJoin extends Assert {
")") ;
}
+ // Skew tables for join testing.
+ // Join keys of ?x ?w and [?x , ?w]
+
+ protected static Table tableS1() {
+ return parseTableInt("(table"
+ ," (row (?z <http://example/z1>) (?x <http://example/x>) (?w 'w11-1'))"
+ ," (row (?z <http://example/z4>) (?x <http://example/x>)))"
+ ); }
+ protected static Table tableS2() {
+ return parseTableInt("(table (row (?x <http://example/x>) (?w <http://example/z1>)))") ;
+ }
+
+ protected static Table tableS1J2() {
+ return parseTableInt("(table"
+ ," (row (?z <http://example/z4>) (?x <http://example/x>) (?w <http://example/z1>) ))"
+ );
+ }
+
// Code
protected static Table parseTableInt(String... strings) {
@@ -244,6 +262,14 @@ public abstract class AbstractTestJoin extends Assert {
executeTest(joinKey, left, right, null, tableOut) ;
}
+ protected void testJoinWithKey(JoinKey joinKey, Table left, Table right, Table tableOut) {
+ executeTest(joinKey, left, right, null, tableOut) ;
+ }
+
+ protected void testJoinWithKey(JoinKey joinKey, Table left, Table right, ExprList conditions, Table tableOut) {
+ executeTest(joinKey, left, right, conditions, tableOut) ;
+ }
+
// Any kind of join (choose by abstract join() operation).
protected abstract void executeTest(JoinKey joinKey, Table left, Table right, ExprList conditions, Table expectedResults) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java
index 2786bed..afca7db 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java
@@ -66,7 +66,6 @@ public abstract class AbstractTestLeftJoin extends AbstractTestJoin {
@Test public void leftjoin_J13() { testJoin("z", tableD2(), tableD1(), tableD3_LJ()) ; }
// No key.
-
@Test public void leftjoin_14() { testJoin(null, tableD1(), tableD2(), tableD3()) ; }