You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by ca...@apache.org on 2011/09/23 23:50:07 UTC
svn commit: r1175044 - in /incubator/jena/Scratch/PC/tdbloader2/trunk: TODO
src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java
src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java
Author: castagna
Date: Fri Sep 23 21:50:07 2011
New Revision: 1175044
URL: http://svn.apache.org/viewvc?rev=1175044&view=rev
Log:
JENA-117
Added:
incubator/jena/Scratch/PC/tdbloader2/trunk/TODO
Modified:
incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java
incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java
Added: incubator/jena/Scratch/PC/tdbloader2/trunk/TODO
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/TODO?rev=1175044&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/TODO (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/TODO Fri Sep 23 21:50:07 2011
@@ -0,0 +1,10 @@
+TODO
+----
+
+ - Support N3, TURTLE, RDF/XML, etc... not only N-Triples | N-Quads.
+
+ - A better SpillSortIterator when there are many files. (Also, make sure
+ to avoid many open files...)
+
+ - Use ThresholdPolicyMemory instead of ThresholdPolicyCount (this needs
+ to bites estimates).
Modified: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java?rev=1175044&r1=1175043&r2=1175044&view=diff
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java (original)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java Fri Sep 23 21:50:07 2011
@@ -32,8 +32,10 @@ import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.PriorityQueue;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.RejectedExecutionHandler;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
import org.openjena.atlas.AtlasException;
import org.openjena.atlas.data.AbstractDataBag;
@@ -50,7 +52,10 @@ public class MultiThreadedSortedDataBag<
private final SerializationFactory<E> serializationFactory;
private final Comparator<? super E> comparator;
- private ExecutorService pool = Executors.newSingleThreadExecutor();
+ private final ThreadPoolExecutor pool = new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(1));
+ private final RejectedExecutionHandler block = new RejectedExecutionHandler() {
+ public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) { r.run() ; }
+ };
private boolean multithreaded = true ;
protected boolean finishedAdding = false;
@@ -60,9 +65,12 @@ public class MultiThreadedSortedDataBag<
public MultiThreadedSortedDataBag(ThresholdPolicy<E> policy, SerializationFactory<E> serializerFactory, Comparator<? super E> comparator)
{
- this.policy = policy;
- this.serializationFactory = serializerFactory;
- this.comparator = comparator;
+ this.policy = policy ;
+ this.serializationFactory = serializerFactory ;
+ this.comparator = comparator ;
+
+ // this will prevent to have more than once spiller running and one queued up
+ this.pool.setRejectedExecutionHandler(this.block) ;
}
protected void checkClosed()
Modified: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java?rev=1175044&r1=1175043&r2=1175044&view=diff
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java (original)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java Fri Sep 23 21:50:07 2011
@@ -24,7 +24,6 @@ import static com.hp.hpl.jena.tdb.sys.Sy
import java.io.File;
import java.io.UnsupportedEncodingException;
-import java.nio.ByteBuffer;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Iterator;
@@ -38,7 +37,6 @@ import org.openjena.atlas.iterator.Trans
import org.openjena.atlas.lib.Bytes;
import org.openjena.atlas.lib.Pair;
import org.openjena.atlas.lib.Sink;
-import org.openjena.atlas.lib.StrUtils;
import org.openjena.atlas.lib.Tuple;
import org.slf4j.Logger;
@@ -56,8 +54,7 @@ import com.hp.hpl.jena.tdb.base.record.R
import com.hp.hpl.jena.tdb.index.bplustree.BPlusTree;
import com.hp.hpl.jena.tdb.index.bplustree.BPlusTreeParams;
import com.hp.hpl.jena.tdb.index.bplustree.BPlusTreeRewriter;
-import com.hp.hpl.jena.tdb.nodetable.Nodec;
-import com.hp.hpl.jena.tdb.nodetable.NodecSSE;
+import com.hp.hpl.jena.tdb.lib.NodeLib;
import com.hp.hpl.jena.tdb.solver.stats.StatsCollectorNodeId;
import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
import com.hp.hpl.jena.tdb.store.Hash;
@@ -182,7 +179,7 @@ public class NodeTableBuilder2 implement
curr = leftIn ;
// generate the node id
Node node = tdbloader2.parse(leftIn) ;
- id = encodeStore(node, objects) ;
+ id = NodeLib.encodeStore(node, objects) ;
// add to hash|id
Hash hash = new Hash(SystemTDB.LenNodeHash);
setHash(hash, node);
@@ -206,33 +203,6 @@ public class NodeTableBuilder2 implement
}
}
- private static Nodec nodec = new NodecSSE() ;
- final private static char MarkerChar = '_' ;
- final private static char[] invalidIRIChars = { MarkerChar , ' ' } ;
- private long encodeStore (Node node, ObjectFile objects) {
- int maxSize = nodec.maxSize(node) ;
- ByteBuffer bb = objects.allocWrite(maxSize) ;
-
- if ( node.isURI() )
- {
- // Pesky spaces etc
- String x = StrUtils.encodeHex(node.getURI(), MarkerChar, invalidIRIChars) ;
- if ( x != node.getURI() )
- node = Node.createURI(x) ;
- }
-
- // Node->String
- String str = tdbloader2.serialize(node) ;
- // String -> bytes
- int x = Bytes.toByteBuffer(str, bb) ;
- bb.position(0) ; // Around the space used
- bb.limit(x) ; // The space we have used.
-
- long id = objects.completeWrite(bb) ;
- return id ;
- }
-
-
private void generateSortedHashNodeIdDataBag() {
try {
log.info("Node Table (2/3): generating input data using node ids...") ;