You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by sa...@apache.org on 2011/11/04 18:41:36 UTC

svn commit: r1197687 - in /incubator/jena/Jena2/ARQ/trunk: ./ src/main/java/com/hp/hpl/jena/query/ src/main/java/com/hp/hpl/jena/sparql/engine/iterator/ src/main/java/com/hp/hpl/jena/sparql/modify/ src/main/java/org/openjena/atlas/data/ src/test/java/c...

Author: sallen
Date: Fri Nov  4 17:41:35 2011
New Revision: 1197687

URL: http://svn.apache.org/viewvc?rev=1197687&view=rev
Log:
JENA-119 (Eliminate memory bounds during query execution).  Refactoring the multiple configuration symbols to a single symbol, "spillToDiskThreshold".

Added:
    incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/ThresholdPolicyFactory.java
Removed:
    incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/ThresholdPolicyNever.java
Modified:
    incubator/jena/Jena2/ARQ/trunk/.classpath
    incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/query/ARQ.java
    incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterSort.java
    incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java
    incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/engine/iterator/TestQueryIterSort.java
    incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/modify/TestUpdateGraph.java

Modified: incubator/jena/Jena2/ARQ/trunk/.classpath
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/.classpath?rev=1197687&r1=1197686&r2=1197687&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/.classpath (original)
+++ incubator/jena/Jena2/ARQ/trunk/.classpath Fri Nov  4 17:41:35 2011
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <classpath>
-  <classpathentry kind="src" path="src/main/java"/>
-  <classpathentry kind="src" path="src/test/java"/>
+  <classpathentry excluding="**/.svn/" kind="src" path="src/main/java"/>
+  <classpathentry excluding="**/.svn/" kind="src" path="src/test/java"/>
   <classpathentry excluding="**/.svn/" kind="src" path="src-examples"/>
 
   <classpathentry kind="var" path="M2_REPO/commons-codec/commons-codec/1.4/commons-codec-1.4.jar" sourcepath="M2_REPO/commons-codec/commons-codec/1.4/commons-codec-1.4-sources.jar"/>

Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/query/ARQ.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/query/ARQ.java?rev=1197687&r1=1197686&r2=1197687&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/query/ARQ.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/query/ARQ.java Fri Nov  4 17:41:35 2011
@@ -210,14 +210,31 @@ public class ARQ
     public static final Symbol javaRegex =  ARQConstants.allocSymbol("javaRegex") ;
     /** Symbol to name the Xerces-J regular expression engine */ 
     public static final Symbol xercesRegex =  ARQConstants.allocSymbol("xercesRegex") ;
-
-    // Spilling controls.
-
-    /** Symbol to set the threshold representing the number of bindings when to use external sorting for queries using ORDER BY */ 
-    public static final Symbol spillOnDiskSortingThreshold = ARQConstants.allocSymbol("spillOnDiskSortingThreshold") ;
-
-    /** Symbol to set the threshold representing the number of bindings when to spill on disk when an update is received */ 
-    public static final Symbol spillOnDiskUpdateThreshold = ARQConstants.allocSymbol("spillOnDiskUpdateThreshold") ;
+
+    
+    /**
+     * A Long value that specifies the number of bindings (or triples for CONSTRUCT queries) to be stored in memory by sort
+     * operations or hash tables before switching to temporary disk files.  The value defaults to -1, which will always
+     * keep the bindings in memory and never write to temporary files.  The amount of memory used will vary based on
+     * the size of the bindings.  If you are retrieving large literal strings, then you may need to lower the value. 
+     * <p/>
+     * Note that for a complex query, several sort or hash operations might be running in parallel; each one will be
+     * allowed to retain as many bindings in memory as this value specifies before it starts putting data in temporary
+     * files.  Also, several running sessions could be doing such operations concurrently.  Therefore, the total number
+     * of bindings held in memory could be many times this value; it is necessary to keep this fact in mind when
+     * choosing the value.
+     * <p/>
+     * Operations currently affected by this symbol: <br/>
+     * ORDER BY, SPARQL Update, CONSTRUCT (optionally)
+     * <p/>
+     * TODO: Give a reasonable suggested value here.  10,000?
+     * <p/>
+     * @see <a href="https://issues.apache.org/jira/browse/JENA-119">JENA-119</a>
+     */
+    // Some possible additions to the list:
+    // Sort: DISTINCT, merge joins<br/>
+    // Hash table: GROUP BY, MINUS, SERVICE, BINDINGS, and hash joins <br/>
+    public static final Symbol spillToDiskThreshold = ARQConstants.allocSymbol("spillToDiskThreshold") ;
     
     // Optimizer controls.
     

Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterSort.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterSort.java?rev=1197687&r1=1197686&r2=1197687&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterSort.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterSort.java Fri Nov  4 17:41:35 2011
@@ -25,13 +25,11 @@ import java.util.List ;
 import org.openjena.atlas.data.BagFactory ;
 import org.openjena.atlas.data.SortedDataBag ;
 import org.openjena.atlas.data.ThresholdPolicy ;
-import org.openjena.atlas.data.ThresholdPolicyCount ;
-import org.openjena.atlas.data.ThresholdPolicyNever ;
+import org.openjena.atlas.data.ThresholdPolicyFactory ;
 import org.openjena.atlas.iterator.IteratorDelayedInitialization ;
 import org.openjena.atlas.lib.Closeable ;
 import org.openjena.riot.SerializationFactoryFinder ;
 
-import com.hp.hpl.jena.query.ARQ ;
 import com.hp.hpl.jena.query.QueryCancelledException ;
 import com.hp.hpl.jena.query.SortCondition ;
 import com.hp.hpl.jena.sparql.engine.ExecutionContext ;
@@ -48,8 +46,6 @@ import com.hp.hpl.jena.sparql.engine.bin
 
 public class QueryIterSort extends QueryIterPlainWrapper
 {
-    private static final long defaultSpillOnDiskSortingThreshold = -1 ; // off by default
-    
 	private final QueryIterator embeddedIterator;      // Keep a record of the underlying source for .cancel.
 	final SortedDataBag<Binding> db;
 	
@@ -63,8 +59,7 @@ public class QueryIterSort extends Query
         super(null, context) ;
         this.embeddedIterator = qIter ;
         
-        long threshold = (Long)context.getContext().get(ARQ.spillOnDiskSortingThreshold, defaultSpillOnDiskSortingThreshold) ;
-        ThresholdPolicy<Binding> policy = (threshold >= 0) ? new ThresholdPolicyCount<Binding>(threshold) : new ThresholdPolicyNever<Binding>() ;
+        ThresholdPolicy<Binding> policy = ThresholdPolicyFactory.policyFromContext(context.getContext());
         this.db = BagFactory.newSortedBag(policy, SerializationFactoryFinder.bindingSerializationFactory(), comparator);
         
         this.setIterator(new SortedBindingIterator(qIter));

Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java?rev=1197687&r1=1197686&r2=1197687&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java Fri Nov  4 17:41:35 2011
@@ -26,15 +26,13 @@ import java.util.List ;
 import org.openjena.atlas.data.BagFactory ;
 import org.openjena.atlas.data.DataBag ;
 import org.openjena.atlas.data.ThresholdPolicy ;
-import org.openjena.atlas.data.ThresholdPolicyCount ;
-import org.openjena.atlas.data.ThresholdPolicyNever ;
+import org.openjena.atlas.data.ThresholdPolicyFactory ;
 import org.openjena.atlas.iterator.Iter ;
 import org.openjena.riot.SerializationFactoryFinder ;
 
 import com.hp.hpl.jena.graph.Graph ;
 import com.hp.hpl.jena.graph.Node ;
 import com.hp.hpl.jena.graph.Triple ;
-import com.hp.hpl.jena.query.ARQ ;
 import com.hp.hpl.jena.query.Query ;
 import com.hp.hpl.jena.query.QueryExecutionFactory ;
 import com.hp.hpl.jena.rdf.model.Model ;
@@ -75,18 +73,14 @@ import com.hp.hpl.jena.util.FileManager 
 /** Implementation of general purpose update request execution */ 
 public class UpdateEngineWorker implements UpdateVisitor
 {
-    static final long defaultSpillOnDiskUpdateThreshold = -1 ;
-
     protected final GraphStore graphStore ;
     protected final Binding initialBinding ;
     protected final boolean alwaysSilent = true ;
-    private final long spillThreshold ;
 
     public UpdateEngineWorker(GraphStore graphStore, Binding initialBinding)
     {
         this.graphStore = graphStore ;
         this.initialBinding = initialBinding ;
-        this.spillThreshold = (Long)graphStore.getContext().get(ARQ.spillOnDiskUpdateThreshold, defaultSpillOnDiskUpdateThreshold) ;
     }
 
     @Override
@@ -233,8 +227,7 @@ public class UpdateEngineWorker implemen
         Graph gDest = graph(gStore, dest) ;
         
         // Avoids concurrency problems by reading fully before writing
-        long threshold = (Long)gStore.getContext().get(ARQ.spillOnDiskUpdateThreshold, defaultSpillOnDiskUpdateThreshold) ;
-        ThresholdPolicy<Triple> policy = (threshold >= 0) ? new ThresholdPolicyCount<Triple>(threshold) : new ThresholdPolicyNever<Triple>();
+        ThresholdPolicy<Triple> policy = ThresholdPolicyFactory.policyFromContext(gStore.getContext());
         DataBag<Triple> db = BagFactory.newDefaultBag(policy, SerializationFactoryFinder.tripleSerializationFactory()) ;
         try
         {
@@ -297,7 +290,7 @@ public class UpdateEngineWorker implemen
         // Decided to serialize the bindings, but could also have decided to
         // serialize the quads after applying the template instead.
         
-        ThresholdPolicy<Binding> policy = (spillThreshold >= 0) ? new ThresholdPolicyCount<Binding>(spillThreshold) : new ThresholdPolicyNever<Binding>();
+        ThresholdPolicy<Binding> policy = ThresholdPolicyFactory.policyFromContext(graphStore.getContext());
         DataBag<Binding> db = BagFactory.newDefaultBag(policy, SerializationFactoryFinder.bindingSerializationFactory()) ;
         try
         {
@@ -334,7 +327,7 @@ public class UpdateEngineWorker implemen
         if ( dsg == null )
             dsg = graphStore ;
         
-        ThresholdPolicy<Binding> policy = (spillThreshold >= 0) ? new ThresholdPolicyCount<Binding>(spillThreshold) : new ThresholdPolicyNever<Binding>();
+        ThresholdPolicy<Binding> policy = ThresholdPolicyFactory.policyFromContext(graphStore.getContext());
         DataBag<Binding> db = BagFactory.newDefaultBag(policy, SerializationFactoryFinder.bindingSerializationFactory()) ;
         try
         {

Added: incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/ThresholdPolicyFactory.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/ThresholdPolicyFactory.java?rev=1197687&view=auto
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/ThresholdPolicyFactory.java (added)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/ThresholdPolicyFactory.java Fri Nov  4 17:41:35 2011
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.openjena.atlas.data ;
+
+import com.hp.hpl.jena.query.ARQ ;
+import com.hp.hpl.jena.sparql.util.Context ;
+
+public class ThresholdPolicyFactory
+{
+    private static final long defaultThreshold = -1 ; // Use the never() policy by default
+
+    private static final ThresholdPolicy<?> NEVER = new ThresholdPolicy<Object>()
+    {
+        @Override
+        public void increment(Object item)
+        {
+            // Do nothing
+        }
+
+        @Override
+        public boolean isThresholdExceeded()
+        {
+            return false ;
+        }
+
+        @Override
+        public void reset()
+        {
+            // Do nothing
+        }
+    } ;
+
+    /**
+     * A threshold policy that is never exceeded.
+     */
+    public static final <E> ThresholdPolicy<E> never()
+    {
+        @SuppressWarnings("unchecked")
+        ThresholdPolicy<E> policy = (ThresholdPolicy<E>) NEVER ;
+        return policy ;
+    }
+    
+    /**
+     * A threshold policy based on the number of tuples added.
+     */
+    public static <E> ThresholdPolicy<E> count(long threshold)
+    {
+        return new ThresholdPolicyCount<E>(threshold) ;
+    }
+
+    /**
+     * A threshold policy based on the {@link com.hp.hpl.jena.query.ARQ#spillToDiskThreshold} symbol in the given Context.
+     * If the symbol is not set, then the {@link #never()} policy is used by default.
+     */
+    public static <E> ThresholdPolicy<E> policyFromContext(Context context)
+    {
+        long threshold = (Long) context.get(ARQ.spillToDiskThreshold, defaultThreshold) ;
+        if ( threshold >= 0 )
+        {
+            return count(threshold);
+        }
+        else
+        {
+            return never() ;
+        }
+    }
+}

Modified: incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/engine/iterator/TestQueryIterSort.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/engine/iterator/TestQueryIterSort.java?rev=1197687&r1=1197686&r2=1197687&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/engine/iterator/TestQueryIterSort.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/engine/iterator/TestQueryIterSort.java Fri Nov  4 17:41:35 2011
@@ -117,7 +117,7 @@ public class TestQueryIterSort {
         public void call() { /* do nothing */ } });
         assertEquals(0, iterator.getReturnedElementCount());
         Context context = new Context() ;
-        context.set(ARQ.spillOnDiskSortingThreshold, 10L) ;
+        context.set(ARQ.spillToDiskThreshold, 10L) ;
         ExecutionContext executionContext = new ExecutionContext(context, (Graph)null, (DatasetGraph)null, (OpExecutorFactory)null) ;
         QueryIterSort qIter = new QueryIterSort(iterator, comparator, executionContext) ;
         try
@@ -143,7 +143,7 @@ public class TestQueryIterSort {
         public void call() { /* do nothing */ } });
         assertEquals(0, iterator.getReturnedElementCount());
         Context context = new Context() ;
-        context.set(ARQ.spillOnDiskSortingThreshold, 10L) ;
+        context.set(ARQ.spillToDiskThreshold, 10L) ;
         ExecutionContext executionContext = new ExecutionContext(context, (Graph)null, (DatasetGraph)null, (OpExecutorFactory)null) ;
         QueryIterSort qIter = new QueryIterSort(iterator, comparator, executionContext) ;
         
@@ -167,7 +167,7 @@ public class TestQueryIterSort {
 
         assertEquals(0, iterator.getReturnedElementCount());
         Context context = new Context() ;
-        context.set(ARQ.spillOnDiskSortingThreshold, 10L) ;
+        context.set(ARQ.spillToDiskThreshold, 10L) ;
         ExecutionContext executionContext = new ExecutionContext(context, (Graph)null, (DatasetGraph)null, (OpExecutorFactory)null) ;
         QueryIterSort qIter = new QueryIterSort(iterator, comparator, executionContext) ;
         try 
@@ -193,7 +193,7 @@ public class TestQueryIterSort {
     {
         assertEquals(0, iterator.getReturnedElementCount());
         Context context = new Context() ;
-        context.set(ARQ.spillOnDiskSortingThreshold, 10L) ;
+        context.set(ARQ.spillToDiskThreshold, 10L) ;
         ExecutionContext executionContext = new ExecutionContext(context, (Graph)null, (DatasetGraph)null, (OpExecutorFactory)null) ;
         QueryIterSort qIter = new QueryIterSort(iterator, comparator, executionContext) ;
         try
@@ -227,7 +227,7 @@ public class TestQueryIterSort {
         public void call() { /* do nothing */ } });
         assertEquals(0, iterator.getReturnedElementCount());
         Context context = new Context() ;
-        context.set(ARQ.spillOnDiskSortingThreshold, 10L) ;
+        context.set(ARQ.spillToDiskThreshold, 10L) ;
         ExecutionContext executionContext = new ExecutionContext(context, (Graph)null, (DatasetGraph)null, (OpExecutorFactory)null) ;
         QueryIterSort qIter = new QueryIterSort(iterator, comparator, executionContext) ;
         try 

Modified: incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/modify/TestUpdateGraph.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/modify/TestUpdateGraph.java?rev=1197687&r1=1197686&r2=1197687&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/modify/TestUpdateGraph.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/modify/TestUpdateGraph.java Fri Nov  4 17:41:35 2011
@@ -235,7 +235,7 @@ public abstract class TestUpdateGraph ex
         
         GraphStore gStore = getEmptyGraphStore() ;
         // Set the threshold to in order to force spill to disk
-        gStore.getContext().set(ARQ.spillOnDiskUpdateThreshold, 0L) ;
+        gStore.getContext().set(ARQ.spillToDiskThreshold, 0L) ;
         
         defaultGraphData(gStore, data(t)) ;
         namedGraphData(gStore, graphIRI, data(t));
@@ -262,7 +262,7 @@ public abstract class TestUpdateGraph ex
         
         GraphStore gStore = getEmptyGraphStore() ;
         // Set the threshold to in order to force spill to disk
-        gStore.getContext().set(ARQ.spillOnDiskUpdateThreshold, 0L) ;
+        gStore.getContext().set(ARQ.spillToDiskThreshold, 0L) ;
         
         defaultGraphData(gStore, data(triple1, triple2, t)) ;
         namedGraphData(gStore, graphIRI, data(t2));