You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by sa...@apache.org on 2011/11/04 18:41:36 UTC
svn commit: r1197687 - in /incubator/jena/Jena2/ARQ/trunk: ./
src/main/java/com/hp/hpl/jena/query/
src/main/java/com/hp/hpl/jena/sparql/engine/iterator/
src/main/java/com/hp/hpl/jena/sparql/modify/
src/main/java/org/openjena/atlas/data/ src/test/java/c...
Author: sallen
Date: Fri Nov 4 17:41:35 2011
New Revision: 1197687
URL: http://svn.apache.org/viewvc?rev=1197687&view=rev
Log:
JENA-119 (Eliminate memory bounds during query execution). Refactoring the multiple configuration symbols to a single symbol, "spillToDiskThreshold".
Added:
incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/ThresholdPolicyFactory.java
Removed:
incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/ThresholdPolicyNever.java
Modified:
incubator/jena/Jena2/ARQ/trunk/.classpath
incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/query/ARQ.java
incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterSort.java
incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java
incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/engine/iterator/TestQueryIterSort.java
incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/modify/TestUpdateGraph.java
Modified: incubator/jena/Jena2/ARQ/trunk/.classpath
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/.classpath?rev=1197687&r1=1197686&r2=1197687&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/.classpath (original)
+++ incubator/jena/Jena2/ARQ/trunk/.classpath Fri Nov 4 17:41:35 2011
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
- <classpathentry kind="src" path="src/main/java"/>
- <classpathentry kind="src" path="src/test/java"/>
+ <classpathentry excluding="**/.svn/" kind="src" path="src/main/java"/>
+ <classpathentry excluding="**/.svn/" kind="src" path="src/test/java"/>
<classpathentry excluding="**/.svn/" kind="src" path="src-examples"/>
<classpathentry kind="var" path="M2_REPO/commons-codec/commons-codec/1.4/commons-codec-1.4.jar" sourcepath="M2_REPO/commons-codec/commons-codec/1.4/commons-codec-1.4-sources.jar"/>
Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/query/ARQ.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/query/ARQ.java?rev=1197687&r1=1197686&r2=1197687&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/query/ARQ.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/query/ARQ.java Fri Nov 4 17:41:35 2011
@@ -210,14 +210,31 @@ public class ARQ
public static final Symbol javaRegex = ARQConstants.allocSymbol("javaRegex") ;
/** Symbol to name the Xerces-J regular expression engine */
public static final Symbol xercesRegex = ARQConstants.allocSymbol("xercesRegex") ;
-
- // Spilling controls.
-
- /** Symbol to set the threshold representing the number of bindings when to use external sorting for queries using ORDER BY */
- public static final Symbol spillOnDiskSortingThreshold = ARQConstants.allocSymbol("spillOnDiskSortingThreshold") ;
-
- /** Symbol to set the threshold representing the number of bindings when to spill on disk when an update is received */
- public static final Symbol spillOnDiskUpdateThreshold = ARQConstants.allocSymbol("spillOnDiskUpdateThreshold") ;
+
+
+ /**
+ * A Long value that specifies the number of bindings (or triples for CONSTRUCT queries) to be stored in memory by sort
+ * operations or hash tables before switching to temporary disk files. The value defaults to -1, which will always
+ * keep the bindings in memory and never write to temporary files. The amount of memory used will vary based on
+ * the size of the bindings. If you are retrieving large literal strings, then you may need to lower the value.
+ * <p/>
+ * Note that for a complex query, several sort or hash operations might be running in parallel; each one will be
+ * allowed to retain as many bindings in memory as this value specifies before it starts putting data in temporary
+ * files. Also, several running sessions could be doing such operations concurrently. Therefore, the total number
+ * of bindings held in memory could be many times this value; it is necessary to keep this fact in mind when
+ * choosing the value.
+ * <p/>
+ * Operations currently affected by this symbol: <br/>
+ * ORDER BY, SPARQL Update, CONSTRUCT (optionally)
+ * <p/>
+ * TODO: Give a reasonable suggested value here. 10,000?
+ * <p/>
+ * @see <a href="https://issues.apache.org/jira/browse/JENA-119">JENA-119</a>
+ */
+ // Some possible additions to the list:
+ // Sort: DISTINCT, merge joins<br/>
+ // Hash table: GROUP BY, MINUS, SERVICE, BINDINGS, and hash joins <br/>
+ public static final Symbol spillToDiskThreshold = ARQConstants.allocSymbol("spillToDiskThreshold") ;
// Optimizer controls.
Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterSort.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterSort.java?rev=1197687&r1=1197686&r2=1197687&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterSort.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/engine/iterator/QueryIterSort.java Fri Nov 4 17:41:35 2011
@@ -25,13 +25,11 @@ import java.util.List ;
import org.openjena.atlas.data.BagFactory ;
import org.openjena.atlas.data.SortedDataBag ;
import org.openjena.atlas.data.ThresholdPolicy ;
-import org.openjena.atlas.data.ThresholdPolicyCount ;
-import org.openjena.atlas.data.ThresholdPolicyNever ;
+import org.openjena.atlas.data.ThresholdPolicyFactory ;
import org.openjena.atlas.iterator.IteratorDelayedInitialization ;
import org.openjena.atlas.lib.Closeable ;
import org.openjena.riot.SerializationFactoryFinder ;
-import com.hp.hpl.jena.query.ARQ ;
import com.hp.hpl.jena.query.QueryCancelledException ;
import com.hp.hpl.jena.query.SortCondition ;
import com.hp.hpl.jena.sparql.engine.ExecutionContext ;
@@ -48,8 +46,6 @@ import com.hp.hpl.jena.sparql.engine.bin
public class QueryIterSort extends QueryIterPlainWrapper
{
- private static final long defaultSpillOnDiskSortingThreshold = -1 ; // off by default
-
private final QueryIterator embeddedIterator; // Keep a record of the underlying source for .cancel.
final SortedDataBag<Binding> db;
@@ -63,8 +59,7 @@ public class QueryIterSort extends Query
super(null, context) ;
this.embeddedIterator = qIter ;
- long threshold = (Long)context.getContext().get(ARQ.spillOnDiskSortingThreshold, defaultSpillOnDiskSortingThreshold) ;
- ThresholdPolicy<Binding> policy = (threshold >= 0) ? new ThresholdPolicyCount<Binding>(threshold) : new ThresholdPolicyNever<Binding>() ;
+ ThresholdPolicy<Binding> policy = ThresholdPolicyFactory.policyFromContext(context.getContext());
this.db = BagFactory.newSortedBag(policy, SerializationFactoryFinder.bindingSerializationFactory(), comparator);
this.setIterator(new SortedBindingIterator(qIter));
Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java?rev=1197687&r1=1197686&r2=1197687&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java Fri Nov 4 17:41:35 2011
@@ -26,15 +26,13 @@ import java.util.List ;
import org.openjena.atlas.data.BagFactory ;
import org.openjena.atlas.data.DataBag ;
import org.openjena.atlas.data.ThresholdPolicy ;
-import org.openjena.atlas.data.ThresholdPolicyCount ;
-import org.openjena.atlas.data.ThresholdPolicyNever ;
+import org.openjena.atlas.data.ThresholdPolicyFactory ;
import org.openjena.atlas.iterator.Iter ;
import org.openjena.riot.SerializationFactoryFinder ;
import com.hp.hpl.jena.graph.Graph ;
import com.hp.hpl.jena.graph.Node ;
import com.hp.hpl.jena.graph.Triple ;
-import com.hp.hpl.jena.query.ARQ ;
import com.hp.hpl.jena.query.Query ;
import com.hp.hpl.jena.query.QueryExecutionFactory ;
import com.hp.hpl.jena.rdf.model.Model ;
@@ -75,18 +73,14 @@ import com.hp.hpl.jena.util.FileManager
/** Implementation of general purpose update request execution */
public class UpdateEngineWorker implements UpdateVisitor
{
- static final long defaultSpillOnDiskUpdateThreshold = -1 ;
-
protected final GraphStore graphStore ;
protected final Binding initialBinding ;
protected final boolean alwaysSilent = true ;
- private final long spillThreshold ;
public UpdateEngineWorker(GraphStore graphStore, Binding initialBinding)
{
this.graphStore = graphStore ;
this.initialBinding = initialBinding ;
- this.spillThreshold = (Long)graphStore.getContext().get(ARQ.spillOnDiskUpdateThreshold, defaultSpillOnDiskUpdateThreshold) ;
}
@Override
@@ -233,8 +227,7 @@ public class UpdateEngineWorker implemen
Graph gDest = graph(gStore, dest) ;
// Avoids concurrency problems by reading fully before writing
- long threshold = (Long)gStore.getContext().get(ARQ.spillOnDiskUpdateThreshold, defaultSpillOnDiskUpdateThreshold) ;
- ThresholdPolicy<Triple> policy = (threshold >= 0) ? new ThresholdPolicyCount<Triple>(threshold) : new ThresholdPolicyNever<Triple>();
+ ThresholdPolicy<Triple> policy = ThresholdPolicyFactory.policyFromContext(gStore.getContext());
DataBag<Triple> db = BagFactory.newDefaultBag(policy, SerializationFactoryFinder.tripleSerializationFactory()) ;
try
{
@@ -297,7 +290,7 @@ public class UpdateEngineWorker implemen
// Decided to serialize the bindings, but could also have decided to
// serialize the quads after applying the template instead.
- ThresholdPolicy<Binding> policy = (spillThreshold >= 0) ? new ThresholdPolicyCount<Binding>(spillThreshold) : new ThresholdPolicyNever<Binding>();
+ ThresholdPolicy<Binding> policy = ThresholdPolicyFactory.policyFromContext(graphStore.getContext());
DataBag<Binding> db = BagFactory.newDefaultBag(policy, SerializationFactoryFinder.bindingSerializationFactory()) ;
try
{
@@ -334,7 +327,7 @@ public class UpdateEngineWorker implemen
if ( dsg == null )
dsg = graphStore ;
- ThresholdPolicy<Binding> policy = (spillThreshold >= 0) ? new ThresholdPolicyCount<Binding>(spillThreshold) : new ThresholdPolicyNever<Binding>();
+ ThresholdPolicy<Binding> policy = ThresholdPolicyFactory.policyFromContext(graphStore.getContext());
DataBag<Binding> db = BagFactory.newDefaultBag(policy, SerializationFactoryFinder.bindingSerializationFactory()) ;
try
{
Added: incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/ThresholdPolicyFactory.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/ThresholdPolicyFactory.java?rev=1197687&view=auto
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/ThresholdPolicyFactory.java (added)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/ThresholdPolicyFactory.java Fri Nov 4 17:41:35 2011
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.openjena.atlas.data ;
+
+import com.hp.hpl.jena.query.ARQ ;
+import com.hp.hpl.jena.sparql.util.Context ;
+
+public class ThresholdPolicyFactory
+{
+ private static final long defaultThreshold = -1 ; // Use the never() policy by default
+
+ private static final ThresholdPolicy<?> NEVER = new ThresholdPolicy<Object>()
+ {
+ @Override
+ public void increment(Object item)
+ {
+ // Do nothing
+ }
+
+ @Override
+ public boolean isThresholdExceeded()
+ {
+ return false ;
+ }
+
+ @Override
+ public void reset()
+ {
+ // Do nothing
+ }
+ } ;
+
+ /**
+ * A threshold policy that is never exceeded.
+ */
+ public static final <E> ThresholdPolicy<E> never()
+ {
+ @SuppressWarnings("unchecked")
+ ThresholdPolicy<E> policy = (ThresholdPolicy<E>) NEVER ;
+ return policy ;
+ }
+
+ /**
+ * A threshold policy based on the number of tuples added.
+ */
+ public static <E> ThresholdPolicy<E> count(long threshold)
+ {
+ return new ThresholdPolicyCount<E>(threshold) ;
+ }
+
+ /**
+ * A threshold policy based on the {@link com.hp.hpl.jena.query.ARQ#spillToDiskThreshold} symbol in the given Context.
+ * If the symbol is not set, then the {@link #never()} policy is used by default.
+ */
+ public static <E> ThresholdPolicy<E> policyFromContext(Context context)
+ {
+ long threshold = (Long) context.get(ARQ.spillToDiskThreshold, defaultThreshold) ;
+ if ( threshold >= 0 )
+ {
+ return count(threshold);
+ }
+ else
+ {
+ return never() ;
+ }
+ }
+}
Modified: incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/engine/iterator/TestQueryIterSort.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/engine/iterator/TestQueryIterSort.java?rev=1197687&r1=1197686&r2=1197687&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/engine/iterator/TestQueryIterSort.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/engine/iterator/TestQueryIterSort.java Fri Nov 4 17:41:35 2011
@@ -117,7 +117,7 @@ public class TestQueryIterSort {
public void call() { /* do nothing */ } });
assertEquals(0, iterator.getReturnedElementCount());
Context context = new Context() ;
- context.set(ARQ.spillOnDiskSortingThreshold, 10L) ;
+ context.set(ARQ.spillToDiskThreshold, 10L) ;
ExecutionContext executionContext = new ExecutionContext(context, (Graph)null, (DatasetGraph)null, (OpExecutorFactory)null) ;
QueryIterSort qIter = new QueryIterSort(iterator, comparator, executionContext) ;
try
@@ -143,7 +143,7 @@ public class TestQueryIterSort {
public void call() { /* do nothing */ } });
assertEquals(0, iterator.getReturnedElementCount());
Context context = new Context() ;
- context.set(ARQ.spillOnDiskSortingThreshold, 10L) ;
+ context.set(ARQ.spillToDiskThreshold, 10L) ;
ExecutionContext executionContext = new ExecutionContext(context, (Graph)null, (DatasetGraph)null, (OpExecutorFactory)null) ;
QueryIterSort qIter = new QueryIterSort(iterator, comparator, executionContext) ;
@@ -167,7 +167,7 @@ public class TestQueryIterSort {
assertEquals(0, iterator.getReturnedElementCount());
Context context = new Context() ;
- context.set(ARQ.spillOnDiskSortingThreshold, 10L) ;
+ context.set(ARQ.spillToDiskThreshold, 10L) ;
ExecutionContext executionContext = new ExecutionContext(context, (Graph)null, (DatasetGraph)null, (OpExecutorFactory)null) ;
QueryIterSort qIter = new QueryIterSort(iterator, comparator, executionContext) ;
try
@@ -193,7 +193,7 @@ public class TestQueryIterSort {
{
assertEquals(0, iterator.getReturnedElementCount());
Context context = new Context() ;
- context.set(ARQ.spillOnDiskSortingThreshold, 10L) ;
+ context.set(ARQ.spillToDiskThreshold, 10L) ;
ExecutionContext executionContext = new ExecutionContext(context, (Graph)null, (DatasetGraph)null, (OpExecutorFactory)null) ;
QueryIterSort qIter = new QueryIterSort(iterator, comparator, executionContext) ;
try
@@ -227,7 +227,7 @@ public class TestQueryIterSort {
public void call() { /* do nothing */ } });
assertEquals(0, iterator.getReturnedElementCount());
Context context = new Context() ;
- context.set(ARQ.spillOnDiskSortingThreshold, 10L) ;
+ context.set(ARQ.spillToDiskThreshold, 10L) ;
ExecutionContext executionContext = new ExecutionContext(context, (Graph)null, (DatasetGraph)null, (OpExecutorFactory)null) ;
QueryIterSort qIter = new QueryIterSort(iterator, comparator, executionContext) ;
try
Modified: incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/modify/TestUpdateGraph.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/modify/TestUpdateGraph.java?rev=1197687&r1=1197686&r2=1197687&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/modify/TestUpdateGraph.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/modify/TestUpdateGraph.java Fri Nov 4 17:41:35 2011
@@ -235,7 +235,7 @@ public abstract class TestUpdateGraph ex
GraphStore gStore = getEmptyGraphStore() ;
// Set the threshold to in order to force spill to disk
- gStore.getContext().set(ARQ.spillOnDiskUpdateThreshold, 0L) ;
+ gStore.getContext().set(ARQ.spillToDiskThreshold, 0L) ;
defaultGraphData(gStore, data(t)) ;
namedGraphData(gStore, graphIRI, data(t));
@@ -262,7 +262,7 @@ public abstract class TestUpdateGraph ex
GraphStore gStore = getEmptyGraphStore() ;
// Set the threshold to in order to force spill to disk
- gStore.getContext().set(ARQ.spillOnDiskUpdateThreshold, 0L) ;
+ gStore.getContext().set(ARQ.spillToDiskThreshold, 0L) ;
defaultGraphData(gStore, data(triple1, triple2, t)) ;
namedGraphData(gStore, graphIRI, data(t2));