You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/06/02 12:41:09 UTC
[6/6] jena git commit: Set the threshold correctly.
Set the threshold correctly.
Use a data bag, not a data net, as we don't use the early notification
of uniqueness anymore. See JENA-949.
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/bcba645a
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/bcba645a
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/bcba645a
Branch: refs/heads/master
Commit: bcba645a561866545705c0e73beae143a15fb3d9
Parents: fecd978
Author: Andy Seaborne <an...@apache.org>
Authored: Tue Jun 2 11:39:05 2015 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Tue Jun 2 11:39:05 2015 +0100
----------------------------------------------------------------------
.../engine/iterator/QueryIterDistinct.java | 45 ++++++++++----------
.../iterator/AbstractTestDistinctReduced.java | 1 -
2 files changed, 23 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/bcba645a/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterDistinct.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterDistinct.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterDistinct.java
index a88cb39..d373ce6 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterDistinct.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterDistinct.java
@@ -20,13 +20,12 @@ package org.apache.jena.sparql.engine.iterator ;
import java.util.* ;
-import org.apache.jena.atlas.data.BagFactory ;
-import org.apache.jena.atlas.data.DistinctDataNet ;
-import org.apache.jena.atlas.data.ThresholdPolicy ;
-import org.apache.jena.atlas.data.ThresholdPolicyFactory ;
+import org.apache.jena.atlas.data.* ;
import org.apache.jena.atlas.lib.InternalErrorException ;
+import org.apache.jena.query.ARQ ;
import org.apache.jena.query.SortCondition ;
import org.apache.jena.riot.system.SerializationFactoryFinder ;
+import org.apache.jena.sparql.ARQException ;
import org.apache.jena.sparql.engine.ExecutionContext ;
import org.apache.jena.sparql.engine.QueryIterator ;
import org.apache.jena.sparql.engine.binding.Binding ;
@@ -39,23 +38,23 @@ import org.apache.jena.sparql.engine.binding.BindingProjectNamed ;
* {@link DistinctDataNet}, then yield
* not return any results until the input iterator has been exhausted.
*
- * @see DistinctDataNet
+ * @see DistinctDataBag
*/
public class QueryIterDistinct extends QueryIter1
{
- private int Threshold1 = 3 ;
- private DistinctDataNet<Binding> db = null ;
+ private long memThreshold = Long.MAX_VALUE ; // Default "off" value.
+ private DistinctDataBag<Binding> db = null ;
private Iterator<Binding> iterator = null ;
private Set<Binding> seen = new HashSet<>() ;
private Binding slot = null ;
- public QueryIterDistinct(QueryIterator qIter, ExecutionContext context) {
- super(qIter, context) ;
- }
-
- public QueryIterDistinct(QueryIterator qIter, ExecutionContext context, int threshold1) {
- super(qIter, context) ;
- this.Threshold1 = threshold1 ;
+ public QueryIterDistinct(QueryIterator qIter, ExecutionContext execCxt) {
+ super(qIter, execCxt) ;
+ if ( execCxt != null ) {
+ memThreshold = execCxt.getContext().getLong(ARQ.spillToDiskThreshold, memThreshold) ;
+ if ( memThreshold < 0 )
+ throw new ARQException("BAd spillToDiskThreshold: "+memThreshold) ;
+ }
}
@Override
@@ -67,7 +66,7 @@ public class QueryIterDistinct extends QueryIter1
return iterator.hasNext() ;
// At this point, we are currently in the initial pre-threshold mode.
- if ( seen.size() >= Threshold1 ) {
+ if ( seen.size() < memThreshold ) {
Binding b = getInputNextUnseen() ;
if ( b == null )
return false ;
@@ -76,18 +75,19 @@ public class QueryIterDistinct extends QueryIter1
return true ;
}
- // Hit the threashold.
+ // Hit the threshold.
loadDataBag() ;
- // Switch to iterating from the databad.
+ // Switch to iterating from the data bag.
iterator = db.iterator() ;
// Leave slot null.
return iterator.hasNext() ;
}
+ /** Load the data bag with. Filter incoming by the already seen in-memory elements */
private void loadDataBag() {
ThresholdPolicy<Binding> policy = ThresholdPolicyFactory.policyFromContext(super.getExecContext().getContext()) ;
Comparator<Binding> comparator = new BindingComparator(new ArrayList<SortCondition>(), super.getExecContext()) ;
- this.db = BagFactory.newDistinctNet(policy, SerializationFactoryFinder.bindingSerializationFactory(), comparator) ;
+ this.db = BagFactory.newDistinctBag(policy, SerializationFactoryFinder.bindingSerializationFactory(), comparator) ;
for(;;) {
Binding b = getInputNextUnseen() ;
if ( b == null )
@@ -96,9 +96,10 @@ public class QueryIterDistinct extends QueryIter1
}
}
- // Return the next binding from the input filtered by seen.
- // This does not update seen.
- // Returns null on end of input.
+ /** Return the next binding from the input filtered by seen.
+ * This does not update seen.
+ * Returns null on end of input.
+ */
private Binding getInputNextUnseen() {
while( getInput().hasNext() ) {
Binding b = getInputNext() ;
@@ -109,7 +110,7 @@ public class QueryIterDistinct extends QueryIter1
return null ;
}
- // Return the next wrapped binding from the input.
+ /** Return the binding from the input, hiding any variables to be ignored. */
private Binding getInputNext() {
Binding b = getInput().next() ;
// Hide unnamed and internal variables.
http://git-wip-us.apache.org/repos/asf/jena/blob/bcba645a/jena-arq/src/test/java/org/apache/jena/sparql/engine/iterator/AbstractTestDistinctReduced.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/engine/iterator/AbstractTestDistinctReduced.java b/jena-arq/src/test/java/org/apache/jena/sparql/engine/iterator/AbstractTestDistinctReduced.java
index d9b5ec3..ac37b63 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/engine/iterator/AbstractTestDistinctReduced.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/engine/iterator/AbstractTestDistinctReduced.java
@@ -77,7 +77,6 @@ public abstract class AbstractTestDistinctReduced extends BaseTest {
distinct(data, results) ;
}
-
private void distinct(List<String> data, List<String> results) {
// Distinct Iterators are not required to preserve order.
List<Binding> input = build(data) ;