You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by ca...@apache.org on 2011/09/23 23:50:07 UTC

svn commit: r1175044 - in /incubator/jena/Scratch/PC/tdbloader2/trunk: TODO src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java

Author: castagna
Date: Fri Sep 23 21:50:07 2011
New Revision: 1175044

URL: http://svn.apache.org/viewvc?rev=1175044&view=rev
Log:
JENA-117

Added:
    incubator/jena/Scratch/PC/tdbloader2/trunk/TODO
Modified:
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/TODO
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/TODO?rev=1175044&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/TODO (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/TODO Fri Sep 23 21:50:07 2011
@@ -0,0 +1,10 @@
+TODO
+----
+
+ - Support N3, TURTLE, RDF/XML, etc... not only N-Triples | N-Quads.
+
+ - A better SpillSortIterator when there are many files. (Also, make sure
+   to avoid many open files...)
+
+ - Use ThresholdPolicyMemory instead of ThresholdPolicyCount (this needs
+   to bites estimates).

Modified: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java?rev=1175044&r1=1175043&r2=1175044&view=diff
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java (original)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java Fri Sep 23 21:50:07 2011
@@ -32,8 +32,10 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
 import java.util.PriorityQueue;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.RejectedExecutionHandler;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
 
 import org.openjena.atlas.AtlasException;
 import org.openjena.atlas.data.AbstractDataBag;
@@ -50,7 +52,10 @@ public class MultiThreadedSortedDataBag<
     private final SerializationFactory<E> serializationFactory;
     private final Comparator<? super E> comparator;
     
-    private ExecutorService pool = Executors.newSingleThreadExecutor();
+    private final ThreadPoolExecutor pool = new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(1));
+    private final RejectedExecutionHandler block = new RejectedExecutionHandler() {
+        public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) { r.run() ; }
+    };
     private boolean multithreaded = true ;
     
     protected boolean finishedAdding = false;
@@ -60,9 +65,12 @@ public class MultiThreadedSortedDataBag<
     
     public MultiThreadedSortedDataBag(ThresholdPolicy<E> policy, SerializationFactory<E> serializerFactory, Comparator<? super E> comparator)
     {
-        this.policy = policy;
-        this.serializationFactory = serializerFactory;
-        this.comparator = comparator;
+        this.policy = policy ;
+        this.serializationFactory = serializerFactory ;
+        this.comparator = comparator ;
+        
+        // this will prevent to have more than once spiller running and one queued up
+        this.pool.setRejectedExecutionHandler(this.block) ;
     }
     
     protected void checkClosed()

Modified: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java?rev=1175044&r1=1175043&r2=1175044&view=diff
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java (original)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java Fri Sep 23 21:50:07 2011
@@ -24,7 +24,6 @@ import static com.hp.hpl.jena.tdb.sys.Sy
 
 import java.io.File;
 import java.io.UnsupportedEncodingException;
-import java.nio.ByteBuffer;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.util.Iterator;
@@ -38,7 +37,6 @@ import org.openjena.atlas.iterator.Trans
 import org.openjena.atlas.lib.Bytes;
 import org.openjena.atlas.lib.Pair;
 import org.openjena.atlas.lib.Sink;
-import org.openjena.atlas.lib.StrUtils;
 import org.openjena.atlas.lib.Tuple;
 import org.slf4j.Logger;
 
@@ -56,8 +54,7 @@ import com.hp.hpl.jena.tdb.base.record.R
 import com.hp.hpl.jena.tdb.index.bplustree.BPlusTree;
 import com.hp.hpl.jena.tdb.index.bplustree.BPlusTreeParams;
 import com.hp.hpl.jena.tdb.index.bplustree.BPlusTreeRewriter;
-import com.hp.hpl.jena.tdb.nodetable.Nodec;
-import com.hp.hpl.jena.tdb.nodetable.NodecSSE;
+import com.hp.hpl.jena.tdb.lib.NodeLib;
 import com.hp.hpl.jena.tdb.solver.stats.StatsCollectorNodeId;
 import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
 import com.hp.hpl.jena.tdb.store.Hash;
@@ -182,7 +179,7 @@ public class NodeTableBuilder2 implement
                     curr = leftIn ;
                     // generate the node id
                     Node node = tdbloader2.parse(leftIn) ;
-                    id = encodeStore(node, objects) ;
+                    id = NodeLib.encodeStore(node, objects) ;
                     // add to hash|id
                     Hash hash = new Hash(SystemTDB.LenNodeHash);
                     setHash(hash, node);
@@ -206,33 +203,6 @@ public class NodeTableBuilder2 implement
         }
     }
     
-    private static Nodec nodec = new NodecSSE() ;
-    final private static char MarkerChar = '_' ;
-    final private static char[] invalidIRIChars = { MarkerChar , ' ' } ; 
-    private long encodeStore (Node node, ObjectFile objects) {
-        int maxSize = nodec.maxSize(node) ;
-        ByteBuffer bb = objects.allocWrite(maxSize) ;
-        
-        if ( node.isURI() ) 
-        {
-            // Pesky spaces etc
-            String x = StrUtils.encodeHex(node.getURI(), MarkerChar, invalidIRIChars) ;
-            if ( x != node.getURI() )
-                node = Node.createURI(x) ; 
-        }
-        
-        // Node->String
-        String str = tdbloader2.serialize(node) ;
-        // String -> bytes
-        int x = Bytes.toByteBuffer(str, bb) ;
-        bb.position(0) ;        // Around the space used
-        bb.limit(x) ;           // The space we have used.
-
-        long id = objects.completeWrite(bb) ;
-        return id ;
-    }
-    
-    
     private void generateSortedHashNodeIdDataBag() {
         try {
             log.info("Node Table (2/3): generating input data using node ids...") ;