You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2012/06/21 18:17:19 UTC
svn commit: r1352595 - in
/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql: algebra/index/
engine/iterator/ engine/main/
Author: rvesse
Date: Thu Jun 21 16:17:18 2012
New Revision: 1352595
URL: http://svn.apache.org/viewvc?rev=1352595&view=rev
Log:
Applied Paul Gearon's patch for MINUS evaluation improvements (JENA-266)
Added:
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/HashIndexTable.java
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexFactory.java
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexTable.java
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/LinearIndex.java
Modified:
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterMinus.java
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java
Added: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/HashIndexTable.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/HashIndexTable.java?rev=1352595&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/HashIndexTable.java (added)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/HashIndexTable.java Thu Jun 21 16:17:18 2012
@@ -0,0 +1,213 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.algebra.index;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.core.Var;
+import com.hp.hpl.jena.sparql.engine.QueryIterator;
+import com.hp.hpl.jena.sparql.engine.binding.Binding;
+
+public class HashIndexTable implements IndexTable {
+
+ final private Set<Key> table ;
+ private Map<Var,Integer> varColumns ;
+ private boolean missingValue ;
+
+ public HashIndexTable(Set<Var> commonVars, QueryIterator data) throws MissingBindingException
+ {
+ initColumnMappings(commonVars) ;
+ if ( commonVars.size() == 0 )
+ {
+ table = null ;
+ return ;
+ }
+
+ table = new HashSet<Key>() ;
+ missingValue = false ;
+
+ while ( data.hasNext() )
+ {
+ Binding binding = data.nextBinding() ;
+ addBindingToTable(binding) ;
+ }
+ data.close() ;
+ }
+
+ @Override
+ public boolean containsCompatibleWithSharedDomain(Binding binding)
+ {
+ // no shared variables means no shared domain, and should be ignored
+ if ( table == null )
+ return false ;
+
+ Key indexKey ;
+ indexKey = convertToKey(binding) ;
+
+ if ( table.contains(indexKey) )
+ return true ;
+
+ if ( anyUnbound(indexKey) )
+ return exhaustiveSearch(indexKey) ;
+ return false ;
+ }
+
+ private boolean anyUnbound(Key mappedBinding)
+ {
+ for ( Node n: mappedBinding.getNodes() )
+ {
+ if ( n == null )
+ return true ;
+ }
+ return false ;
+ }
+
+ private void initColumnMappings(Set<Var> commonVars)
+ {
+ varColumns = new HashMap<Var,Integer>() ;
+ int c = 0 ;
+ for ( Var var: commonVars )
+ varColumns.put(var, c++) ;
+ }
+
+ private void addBindingToTable(Binding binding) throws MissingBindingException
+ {
+ Key key = convertToKey(binding) ;
+ table.add(key) ;
+ if ( missingValue )
+ throw new MissingBindingException(table, varColumns) ;
+ }
+
+ private Key convertToKey(Binding binding)
+ {
+ Node[] indexKey = new Node[varColumns.size()] ;
+
+ for ( Map.Entry<Var,Integer> varCol : varColumns.entrySet() )
+ {
+ Node value = binding.get(varCol.getKey()) ;
+ if ( value == null )
+ missingValue = true ;
+ indexKey[varCol.getValue()] = value ;
+ }
+ return new Key(indexKey) ;
+ }
+
+ private boolean exhaustiveSearch(Key mappedBindingLeft)
+ {
+ for ( Key mappedBindingRight: table )
+ {
+ if ( mappedBindingLeft.compatibleAndSharedDomain(mappedBindingRight) )
+ return true ;
+ }
+ return false ;
+ }
+
+ static class MissingBindingException extends Exception {
+ private final Set<Key> data ;
+ private final Map<Var,Integer> varMappings ;
+
+ public MissingBindingException(Set<Key> data, Map<Var,Integer> varMappings)
+ {
+ this.data = data ;
+ this.varMappings = varMappings ;
+ }
+
+ public Set<Key> getData() { return data ; }
+ public Map<Var,Integer> getMap() { return varMappings ; }
+ }
+
+ static class Key
+ {
+ final Node[] nodes;
+
+ Key(Node[] nodes)
+ {
+ this.nodes = nodes ;
+ }
+
+ public Node[] getNodes()
+ {
+ return nodes;
+ }
+
+ @Override
+ public String toString()
+ {
+ return Arrays.asList(nodes).toString() ;
+ }
+
+ @Override
+ public int hashCode()
+ {
+ int result = 0 ;
+ for ( Node n: nodes )
+ result ^= (n == null) ? 0 : n.hashCode() ;
+ return result ;
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if ( ! (o instanceof Key) )
+ return false ;
+ Node[] other = ((Key)o).nodes ;
+
+ for ( int i = 0 ; i < nodes.length ; i++ )
+ {
+ if ( nodes[i] == null)
+ {
+ if ( other[i] != null )
+ return false ;
+ }
+ else
+ {
+ if ( ! nodes[i].equals(other[i]) )
+ return false ;
+ }
+ }
+ return true ;
+ }
+
+ public boolean compatibleAndSharedDomain(Key mappedBindingR)
+ {
+ Node[] nodesRight = mappedBindingR.getNodes() ;
+
+ boolean sharedDomain = false ;
+ for ( int c = 0 ; c < nodes.length ; c++ )
+ {
+ Node nLeft = nodes[c] ;
+ Node nRight = nodesRight[c] ;
+
+ if ( nLeft != null && nRight != null )
+ {
+ if ( nLeft.equals(nRight) )
+ return false ;
+ sharedDomain = true ;
+ }
+ }
+ return sharedDomain ;
+ }
+ }
+}
+
Added: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexFactory.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexFactory.java?rev=1352595&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexFactory.java (added)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexFactory.java Thu Jun 21 16:17:18 2012
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.algebra.index;
+
+import java.util.Set;
+
+import com.hp.hpl.jena.sparql.algebra.index.HashIndexTable.MissingBindingException;
+import com.hp.hpl.jena.sparql.core.Var;
+import com.hp.hpl.jena.sparql.engine.QueryIterator;
+
+public class IndexFactory {
+
+ public static IndexTable createIndex(Set<Var> commonVars, QueryIterator data)
+ {
+ try {
+ return new HashIndexTable(commonVars, data) ;
+ } catch (MissingBindingException e) {
+ return new LinearIndex(commonVars, data, e.getData(), e.getMap()) ;
+ }
+ }
+}
+
Added: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexTable.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexTable.java?rev=1352595&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexTable.java (added)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexTable.java Thu Jun 21 16:17:18 2012
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.algebra.index;
+
+import com.hp.hpl.jena.sparql.engine.binding.Binding;
+
+public interface IndexTable {
+
+ public abstract boolean containsCompatibleWithSharedDomain(Binding binding);
+
+}
Added: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/LinearIndex.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/LinearIndex.java?rev=1352595&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/LinearIndex.java (added)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/LinearIndex.java Thu Jun 21 16:17:18 2012
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.algebra.index;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.algebra.Algebra;
+import com.hp.hpl.jena.sparql.core.Var;
+import com.hp.hpl.jena.sparql.engine.QueryIterator;
+import com.hp.hpl.jena.sparql.engine.binding.Binding;
+import com.hp.hpl.jena.sparql.engine.binding.BindingHashMap;
+
+public class LinearIndex implements IndexTable {
+
+ final Set<Var> commonVars ;
+ List<Binding> table = new ArrayList<Binding>() ;
+
+ public LinearIndex(Set<Var> commonVars, QueryIterator data)
+ {
+ this.commonVars = commonVars ;
+ while ( data.hasNext() )
+ table.add(data.next()) ;
+ data.close() ;
+ }
+
+ public LinearIndex(Set<Var> commonVars, QueryIterator data, Set<HashIndexTable.Key> loadedData, Map<Var,Integer> mappings)
+ {
+ this.commonVars = commonVars ;
+ for ( HashIndexTable.Key key: loadedData )
+ table.add(toBinding(key, mappings)) ;
+
+ while ( data.hasNext() )
+ table.add(data.next()) ;
+ data.close() ;
+ }
+
+ @Override
+ public boolean containsCompatibleWithSharedDomain(Binding bindingLeft)
+ {
+ if ( commonVars.size() == 0 )
+ return false ;
+
+ for ( Binding bindingRight: table )
+ {
+ if ( hasCommonVars(bindingLeft, bindingRight)
+ && Algebra.compatible(bindingLeft, bindingRight) )
+ return true ;
+ }
+ return false ;
+ }
+
+ private boolean hasCommonVars(Binding left, Binding right)
+ {
+ for ( Var v: commonVars )
+ {
+ if ( left.contains(v) && right.contains(v) )
+ return true ;
+ }
+ return false;
+ }
+
+ private static Binding toBinding(HashIndexTable.Key key, Map<Var,Integer> mappings)
+ {
+ Node[] values = key.getNodes() ;
+ BindingHashMap b = new BindingHashMap() ;
+ for (Map.Entry<Var,Integer> mapping: mappings.entrySet())
+ {
+ Node value = values[mapping.getValue()] ;
+ if ( value != null )
+ b.add(mapping.getKey(), value) ;
+ }
+ return b ;
+ }
+}
+
Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterMinus.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterMinus.java?rev=1352595&r1=1352594&r2=1352595&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterMinus.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterMinus.java Thu Jun 21 16:17:18 2012
@@ -18,57 +18,72 @@
package com.hp.hpl.jena.sparql.engine.iterator;
-import java.util.Iterator ;
+import java.util.Map;
import java.util.Set ;
-import org.openjena.atlas.iterator.Iter ;
-
-import com.hp.hpl.jena.sparql.algebra.Algebra ;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.algebra.index.IndexFactory;
+import com.hp.hpl.jena.sparql.algebra.index.IndexTable;
import com.hp.hpl.jena.sparql.core.Var ;
import com.hp.hpl.jena.sparql.engine.ExecutionContext ;
import com.hp.hpl.jena.sparql.engine.QueryIterator ;
import com.hp.hpl.jena.sparql.engine.binding.Binding ;
/** Minus by materializing the RHS - this is not streamed on the right */
-public class QueryIterMinus extends QueryIter2LoopOnLeft
+public class QueryIterMinus extends QueryIter2
{
- public QueryIterMinus(QueryIterator left, QueryIterator right, ExecutionContext qCxt)
+ private IndexTable tableRight;
+ private Map<Var, Integer> varColumns ;
+ private Set<Node[]> rightTable;
+ Binding slot = null ;
+
+ public QueryIterMinus(QueryIterator left, QueryIterator right, Set<Var> commonVars, ExecutionContext qCxt)
{
super(left, right, qCxt) ;
+ tableRight = IndexFactory.createIndex(commonVars, right) ;
}
- @Override
protected Binding getNextSlot(Binding bindingLeft)
{
- boolean accept = true ;
- Set<Var> varsLeft = Iter.toSet(bindingLeft.vars()) ;
+ if ( tableRight.containsCompatibleWithSharedDomain(bindingLeft) )
+ return null ;
+
+ return bindingLeft;
+ }
- for ( Iterator<Binding> iter = tableRight.iterator(null) ; iter.hasNext() ; )
+ @Override
+ protected final void closeSubIterator() { }
+
+ @Override
+ protected void requestSubCancel() { }
+
+ @Override
+ protected final boolean hasNextBinding()
+ {
+ if ( slot != null )
+ return true ;
+
+ while ( getLeft().hasNext() )
{
- Binding bindingRight = iter.next() ;
-
- if ( ! commonVariable(varsLeft, bindingRight) )
- continue ;
- if ( Algebra.compatible(bindingLeft, bindingRight) )
+ Binding bindingLeft = getLeft().nextBinding() ;
+ slot = getNextSlot(bindingLeft) ;
+ if ( slot != null )
{
- accept = false ;
- break ;
+ slot = bindingLeft ;
+ return true ;
}
}
-
- if ( accept )
- return bindingLeft ;
- return null ;
+ getLeft().close() ;
+ return false ;
}
- private boolean commonVariable(Set<Var> varsLeft, Binding bindingRight)
+ @Override
+ protected final Binding moveToNextBinding()
{
- for ( Iterator<Var> iter = bindingRight.vars() ; iter.hasNext() ; )
- {
- Var v = iter.next() ;
- if ( varsLeft.contains(v) )
- return true ;
- }
- return false ;
+ if ( ! hasNextBinding() )
+ return null ;
+ Binding x = slot ;
+ slot = null ;
+ return x ;
}
}
Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java?rev=1352595&r1=1352594&r2=1352595&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java Thu Jun 21 16:17:18 2012
@@ -21,6 +21,7 @@ package com.hp.hpl.jena.sparql.engine.ma
import java.util.ArrayList ;
import java.util.Iterator ;
import java.util.List ;
+import java.util.Set;
import org.openjena.atlas.iterator.Iter ;
import org.openjena.atlas.logging.Log ;
@@ -29,9 +30,11 @@ import com.hp.hpl.jena.graph.Node ;
import com.hp.hpl.jena.query.QueryExecException ;
import com.hp.hpl.jena.sparql.ARQNotImplemented ;
import com.hp.hpl.jena.sparql.algebra.Op ;
+import com.hp.hpl.jena.sparql.algebra.OpVars;
import com.hp.hpl.jena.sparql.algebra.op.* ;
import com.hp.hpl.jena.sparql.core.BasicPattern ;
import com.hp.hpl.jena.sparql.core.Quad ;
+import com.hp.hpl.jena.sparql.core.Var;
import com.hp.hpl.jena.sparql.engine.ExecutionContext ;
import com.hp.hpl.jena.sparql.engine.QueryIterator ;
import com.hp.hpl.jena.sparql.engine.binding.Binding ;
@@ -250,9 +253,16 @@ public class OpExecutor
protected QueryIterator execute(OpMinus opMinus, QueryIterator input)
{
- QueryIterator left = executeOp(opMinus.getLeft(), input) ;
- QueryIterator right = executeOp(opMinus.getRight(), root()) ;
- return new QueryIterMinus(left, right, execCxt) ;
+ Op lhsOp = opMinus.getLeft();
+ Op rhsOp = opMinus.getRight();
+
+ QueryIterator left = executeOp(lhsOp, input) ;
+ QueryIterator right = executeOp(rhsOp, root()) ;
+
+ Set<Var> commonVars = OpVars.patternVars(lhsOp) ;
+ commonVars.retainAll(OpVars.patternVars(rhsOp)) ;
+
+ return new QueryIterMinus(left, right, commonVars, execCxt) ;
}
protected QueryIterator execute(OpUnion opUnion, QueryIterator input)