You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2012/06/21 18:17:19 UTC

svn commit: r1352595 - in /jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql: algebra/index/ engine/iterator/ engine/main/

Author: rvesse
Date: Thu Jun 21 16:17:18 2012
New Revision: 1352595

URL: http://svn.apache.org/viewvc?rev=1352595&view=rev
Log:
Applied Paul Gearon's patch for MINUS evaluation improvements (JENA-266)

Added:
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/HashIndexTable.java
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexFactory.java
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexTable.java
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/LinearIndex.java
Modified:
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterMinus.java
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java

Added: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/HashIndexTable.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/HashIndexTable.java?rev=1352595&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/HashIndexTable.java (added)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/HashIndexTable.java Thu Jun 21 16:17:18 2012
@@ -0,0 +1,213 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.algebra.index;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.core.Var;
+import com.hp.hpl.jena.sparql.engine.QueryIterator;
+import com.hp.hpl.jena.sparql.engine.binding.Binding;
+
+public class HashIndexTable implements IndexTable {
+
+	final private Set<Key> table ;
+	private Map<Var,Integer> varColumns ;
+	private boolean missingValue ;
+
+	public HashIndexTable(Set<Var> commonVars, QueryIterator data) throws MissingBindingException
+    {
+    	initColumnMappings(commonVars) ;
+    	if ( commonVars.size() == 0 )
+    	{
+    		table = null ;
+    		return ;
+    	}
+
+    	table = new HashSet<Key>() ;
+    	missingValue = false ;
+
+    	while ( data.hasNext() )
+        {
+            Binding binding = data.nextBinding() ;
+            addBindingToTable(binding) ;
+        }
+    	data.close() ;
+    }
+
+    @Override
+	public boolean containsCompatibleWithSharedDomain(Binding binding)
+    {
+    	// no shared variables means no shared domain, and should be ignored
+    	if ( table == null )
+    		return false ;
+
+    	Key indexKey ;
+		indexKey = convertToKey(binding) ;
+
+		if ( table.contains(indexKey) )
+			return true ;
+		
+		if ( anyUnbound(indexKey) )
+			return exhaustiveSearch(indexKey) ;
+		return false ;
+    }
+
+    private boolean anyUnbound(Key mappedBinding)
+    {
+    	for ( Node n: mappedBinding.getNodes() )
+    	{
+    		if ( n == null )
+    			return true ;
+    	}
+    	return false ;
+    }
+
+    private void initColumnMappings(Set<Var> commonVars)
+    {
+    	varColumns = new HashMap<Var,Integer>() ;
+    	int c = 0 ;
+    	for ( Var var: commonVars )
+    		varColumns.put(var, c++) ;
+    }
+
+    private void addBindingToTable(Binding binding) throws MissingBindingException
+    {
+    	Key key = convertToKey(binding) ;
+		table.add(key) ;
+		if ( missingValue )
+			throw new MissingBindingException(table, varColumns) ;
+    }
+
+    private Key convertToKey(Binding binding)
+    {
+		Node[] indexKey = new Node[varColumns.size()] ;
+
+		for ( Map.Entry<Var,Integer> varCol : varColumns.entrySet() )
+		{
+			Node value = binding.get(varCol.getKey()) ;
+			if ( value == null )
+				missingValue = true ;
+			indexKey[varCol.getValue()] = value ;
+		}
+		return new Key(indexKey) ;
+    }
+
+    private boolean exhaustiveSearch(Key mappedBindingLeft)
+    {
+    	for ( Key mappedBindingRight: table )
+    	{
+    		if ( mappedBindingLeft.compatibleAndSharedDomain(mappedBindingRight) )
+    			return true ;
+    	}
+    	return false ;
+    }
+
+    static class MissingBindingException extends Exception {
+    	private final Set<Key> data ;
+    	private final Map<Var,Integer> varMappings ;
+
+    	public MissingBindingException(Set<Key> data, Map<Var,Integer> varMappings)
+    	{
+    		this.data = data ;
+    		this.varMappings = varMappings ;
+    	}
+
+    	public Set<Key> getData() { return data ; }
+    	public Map<Var,Integer> getMap() { return varMappings ; }
+    }
+    
+    static class Key
+    {
+    	final Node[] nodes;
+
+    	Key(Node[] nodes)
+    	{
+    		this.nodes = nodes ;
+    	}
+
+    	public Node[] getNodes()
+    	{
+    		return nodes;
+    	}
+
+    	@Override
+		public String toString()
+    	{
+    		return Arrays.asList(nodes).toString() ;
+    	}
+
+    	@Override
+		public int hashCode()
+    	{
+    		int result = 0 ;
+    		for ( Node n: nodes )
+    			result ^= (n == null) ? 0 : n.hashCode() ;
+    		return result ;
+    	}
+    	
+    	@Override
+		public boolean equals(Object o)
+    	{
+    		if ( ! (o instanceof Key) )
+    			return false ;
+    		Node[] other = ((Key)o).nodes ;
+
+    		for ( int i = 0 ; i < nodes.length ; i++ )
+    		{
+    			if ( nodes[i] == null)
+    			{
+    				if ( other[i] != null )
+        				return false ;
+    			}
+    			else
+    			{
+	    			if ( ! nodes[i].equals(other[i]) )
+	    				return false ;
+    			}
+    		}
+    		return true ;
+    	}
+
+        public boolean compatibleAndSharedDomain(Key mappedBindingR)
+        {
+        	Node[] nodesRight = mappedBindingR.getNodes() ;
+
+        	boolean sharedDomain = false ;
+        	for ( int c = 0 ; c < nodes.length ; c++ )
+            {
+                Node nLeft  = nodes[c] ; 
+                Node nRight = nodesRight[c] ;
+                
+                if ( nLeft != null && nRight != null )
+            	{
+            		if ( nLeft.equals(nRight) )
+            			return false ;
+            		sharedDomain = true ;
+            	}
+            }
+            return sharedDomain ;
+        }
+    }
+}
+

Added: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexFactory.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexFactory.java?rev=1352595&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexFactory.java (added)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexFactory.java Thu Jun 21 16:17:18 2012
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.algebra.index;
+
+import java.util.Set;
+
+import com.hp.hpl.jena.sparql.algebra.index.HashIndexTable.MissingBindingException;
+import com.hp.hpl.jena.sparql.core.Var;
+import com.hp.hpl.jena.sparql.engine.QueryIterator;
+
+public class IndexFactory {
+
+	public static IndexTable createIndex(Set<Var> commonVars, QueryIterator data)
+	{
+		try {
+			return new HashIndexTable(commonVars, data) ;
+		} catch (MissingBindingException e) {
+			return new LinearIndex(commonVars, data, e.getData(), e.getMap()) ;
+		}
+	}
+}
+

Added: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexTable.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexTable.java?rev=1352595&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexTable.java (added)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/IndexTable.java Thu Jun 21 16:17:18 2012
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.algebra.index;
+
+import com.hp.hpl.jena.sparql.engine.binding.Binding;
+
+public interface IndexTable {
+
+	public abstract boolean containsCompatibleWithSharedDomain(Binding binding);
+
+}

Added: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/LinearIndex.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/LinearIndex.java?rev=1352595&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/LinearIndex.java (added)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/index/LinearIndex.java Thu Jun 21 16:17:18 2012
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.algebra.index;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.algebra.Algebra;
+import com.hp.hpl.jena.sparql.core.Var;
+import com.hp.hpl.jena.sparql.engine.QueryIterator;
+import com.hp.hpl.jena.sparql.engine.binding.Binding;
+import com.hp.hpl.jena.sparql.engine.binding.BindingHashMap;
+
+public class LinearIndex implements IndexTable {
+
+	final Set<Var> commonVars ;
+	List<Binding> table = new ArrayList<Binding>() ;
+
+	public LinearIndex(Set<Var> commonVars, QueryIterator data)
+	{
+		this.commonVars = commonVars ;
+		while ( data.hasNext() )
+			table.add(data.next()) ;
+		data.close() ;
+	}
+
+	public LinearIndex(Set<Var> commonVars, QueryIterator data, Set<HashIndexTable.Key> loadedData, Map<Var,Integer> mappings)
+	{
+		this.commonVars = commonVars ;
+		for ( HashIndexTable.Key key: loadedData )
+			table.add(toBinding(key, mappings)) ;
+
+		while ( data.hasNext() )
+			table.add(data.next()) ;
+		data.close() ;
+	}
+
+	@Override
+	public boolean containsCompatibleWithSharedDomain(Binding bindingLeft)
+	{
+		if ( commonVars.size() == 0 )
+			return false ;
+
+		for ( Binding bindingRight: table )
+    	{
+			if ( hasCommonVars(bindingLeft, bindingRight)
+					&& Algebra.compatible(bindingLeft, bindingRight) )
+    			return true ;
+    	}
+    	return false ;
+	}
+
+	private boolean hasCommonVars(Binding left, Binding right)
+	{
+		for ( Var v: commonVars )
+		{
+			if ( left.contains(v) && right.contains(v) )
+				return true ;
+		}
+		return false;
+	}
+
+	private static Binding toBinding(HashIndexTable.Key key, Map<Var,Integer> mappings)
+	{
+		Node[] values = key.getNodes() ;
+		BindingHashMap b = new BindingHashMap() ;
+		for (Map.Entry<Var,Integer> mapping: mappings.entrySet())
+		{
+			Node value = values[mapping.getValue()] ;
+			if ( value != null )
+				b.add(mapping.getKey(), value) ;
+		}
+		return b ;
+	}
+}
+

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterMinus.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterMinus.java?rev=1352595&r1=1352594&r2=1352595&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterMinus.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterMinus.java Thu Jun 21 16:17:18 2012
@@ -18,57 +18,72 @@
 
 package com.hp.hpl.jena.sparql.engine.iterator;
 
-import java.util.Iterator ;
+import java.util.Map;
 import java.util.Set ;
 
-import org.openjena.atlas.iterator.Iter ;
-
-import com.hp.hpl.jena.sparql.algebra.Algebra ;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.algebra.index.IndexFactory;
+import com.hp.hpl.jena.sparql.algebra.index.IndexTable;
 import com.hp.hpl.jena.sparql.core.Var ;
 import com.hp.hpl.jena.sparql.engine.ExecutionContext ;
 import com.hp.hpl.jena.sparql.engine.QueryIterator ;
 import com.hp.hpl.jena.sparql.engine.binding.Binding ;
 
 /** Minus by materializing the RHS - this is not streamed on the right */
-public class QueryIterMinus extends QueryIter2LoopOnLeft
+public class QueryIterMinus extends QueryIter2
 {
-    public QueryIterMinus(QueryIterator left, QueryIterator right, ExecutionContext qCxt)
+	private IndexTable tableRight;
+	private Map<Var, Integer> varColumns ;
+	private Set<Node[]> rightTable;
+    Binding slot = null ;
+
+	public QueryIterMinus(QueryIterator left, QueryIterator right, Set<Var> commonVars, ExecutionContext qCxt)
     {
         super(left, right, qCxt) ;
+        tableRight = IndexFactory.createIndex(commonVars, right) ;
     }
 
-    @Override
     protected Binding getNextSlot(Binding bindingLeft)
     {
-        boolean accept = true ;
-        Set<Var> varsLeft = Iter.toSet(bindingLeft.vars()) ;
+        if ( tableRight.containsCompatibleWithSharedDomain(bindingLeft) )
+        	return null ;
+        
+        return bindingLeft;
+    }
 
-        for ( Iterator<Binding> iter = tableRight.iterator(null) ; iter.hasNext() ; )
+    @Override
+    protected final void closeSubIterator() { }
+    
+    @Override
+    protected void requestSubCancel() { }
+   
+    @Override
+    protected final boolean hasNextBinding()
+    {
+        if ( slot != null )
+            return true ;
+        
+        while ( getLeft().hasNext() )
         {
-            Binding bindingRight = iter.next() ;
-            
-            if ( ! commonVariable(varsLeft, bindingRight) )
-                continue ;
-            if ( Algebra.compatible(bindingLeft, bindingRight) )
+            Binding bindingLeft = getLeft().nextBinding() ;
+            slot = getNextSlot(bindingLeft) ;
+            if ( slot != null )
             {
-                accept = false ;
-                break ;
+                slot = bindingLeft ; 
+                return true ;
             }
         }
-
-        if ( accept )
-            return bindingLeft ;
-        return null ;
+        getLeft().close() ;
+        return false ;
     }
 
-    private boolean commonVariable(Set<Var> varsLeft, Binding bindingRight)
+    @Override
+    protected final Binding moveToNextBinding()
     {
-        for ( Iterator<Var> iter = bindingRight.vars() ; iter.hasNext() ; )
-        {
-            Var v = iter.next() ;
-            if ( varsLeft.contains(v) )
-                return true ;
-        }
-        return false ;
+        if ( ! hasNextBinding() )
+            return null ;
+        Binding x = slot ;
+        slot = null ;
+        return x ;
     }
 }

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java?rev=1352595&r1=1352594&r2=1352595&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/OpExecutor.java Thu Jun 21 16:17:18 2012
@@ -21,6 +21,7 @@ package com.hp.hpl.jena.sparql.engine.ma
 import java.util.ArrayList ;
 import java.util.Iterator ;
 import java.util.List ;
+import java.util.Set;
 
 import org.openjena.atlas.iterator.Iter ;
 import org.openjena.atlas.logging.Log ;
@@ -29,9 +30,11 @@ import com.hp.hpl.jena.graph.Node ;
 import com.hp.hpl.jena.query.QueryExecException ;
 import com.hp.hpl.jena.sparql.ARQNotImplemented ;
 import com.hp.hpl.jena.sparql.algebra.Op ;
+import com.hp.hpl.jena.sparql.algebra.OpVars;
 import com.hp.hpl.jena.sparql.algebra.op.* ;
 import com.hp.hpl.jena.sparql.core.BasicPattern ;
 import com.hp.hpl.jena.sparql.core.Quad ;
+import com.hp.hpl.jena.sparql.core.Var;
 import com.hp.hpl.jena.sparql.engine.ExecutionContext ;
 import com.hp.hpl.jena.sparql.engine.QueryIterator ;
 import com.hp.hpl.jena.sparql.engine.binding.Binding ;
@@ -250,9 +253,16 @@ public class OpExecutor
     
     protected QueryIterator execute(OpMinus opMinus, QueryIterator input)
     { 
-        QueryIterator left = executeOp(opMinus.getLeft(), input) ;
-        QueryIterator right = executeOp(opMinus.getRight(), root()) ;
-        return new QueryIterMinus(left, right, execCxt) ;
+    	Op lhsOp = opMinus.getLeft();
+    	Op rhsOp = opMinus.getRight();
+    	
+        QueryIterator left = executeOp(lhsOp, input) ;
+        QueryIterator right = executeOp(rhsOp, root()) ;
+
+        Set<Var> commonVars = OpVars.patternVars(lhsOp) ;
+        commonVars.retainAll(OpVars.patternVars(rhsOp)) ;
+
+        return new QueryIterMinus(left, right, commonVars, execCxt) ;
     }
 
     protected QueryIterator execute(OpUnion opUnion, QueryIterator input)