You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by ca...@apache.org on 2011/09/23 17:34:50 UTC

svn commit: r1174838 - in /incubator/jena/Scratch/PC/tdbloader2/trunk/src: main/java/cmd/ main/java/org/apache/jena/tdbloader2/ test/java/cmd/ test/resources/test-01/ test/resources/test-02/ test/resources/test-03/ test/resources/test-04/

Author: castagna
Date: Fri Sep 23 15:34:49 2011
New Revision: 1174838

URL: http://svn.apache.org/viewvc?rev=1174838&view=rev
Log:
JENA-117

Blank nodes labels are still an issue (something is not working properly)... and things need to be cleaned up. 
The aim so far was to quickly have something running end-to-end to proof feasibility.

Added:
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/CustomLabelToNode.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairComparator.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairInputStream.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairOutputStream.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairSerializationFactory.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/QuadSerializationFactory.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TripleSerializationFactory.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleComparator.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleInputStream.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleOutputStream.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/java/cmd/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/java/cmd/TestTDBLoader2.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-01/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-01/data.nq
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-01/data.nt
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-02/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-02/data.nq
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-03/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-03/data.nt
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-04/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-04/data-01.nt
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-04/data-02.nt
Modified:
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/cmd/tdbloader2.java
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/ProgressLogger.java

Modified: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/cmd/tdbloader2.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/cmd/tdbloader2.java?rev=1174838&r1=1174837&r2=1174838&view=diff
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/cmd/tdbloader2.java (original)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/cmd/tdbloader2.java Fri Sep 23 15:34:49 2011
@@ -19,52 +19,53 @@
 package cmd;
 
 import static com.hp.hpl.jena.sparql.util.Utils.nowAsString;
-import static com.hp.hpl.jena.tdb.lib.NodeLib.setHash;
-import static com.hp.hpl.jena.tdb.sys.SystemTDB.LenNodeHash;
 import static com.hp.hpl.jena.tdb.sys.SystemTDB.SizeOfLong;
-import static com.hp.hpl.jena.tdb.sys.SystemTDB.SizeOfNodeId;
 
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
-import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.StringWriter;
-import java.io.UnsupportedEncodingException;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.List;
 import java.util.zip.GZIPInputStream;
 import java.util.zip.GZIPOutputStream;
 
-import org.apache.commons.codec.binary.Hex;
+import org.apache.jena.tdbloader2.CustomLabelToNode;
 import org.apache.jena.tdbloader2.MultiThreadedSortedDataBag;
+import org.apache.jena.tdbloader2.NodeTableBuilder2;
 import org.apache.jena.tdbloader2.ProgressLogger;
+import org.apache.jena.tdbloader2.QuadSerializationFactory;
+import org.apache.jena.tdbloader2.TripleSerializationFactory;
+import org.apache.jena.tdbloader2.TupleComparator;
 import org.openjena.atlas.AtlasException;
 import org.openjena.atlas.data.DataBag;
-import org.openjena.atlas.data.SerializationFactory;
 import org.openjena.atlas.data.ThresholdPolicyCount;
 import org.openjena.atlas.io.IO;
 import org.openjena.atlas.iterator.Iter;
 import org.openjena.atlas.iterator.Transform;
 import org.openjena.atlas.lib.Bytes;
-import org.openjena.atlas.lib.Closeable;
 import org.openjena.atlas.lib.ColumnMap;
 import org.openjena.atlas.lib.FileOps;
-import org.openjena.atlas.lib.Pair;
 import org.openjena.atlas.lib.Sink;
 import org.openjena.atlas.lib.Tuple;
 import org.openjena.atlas.logging.Log;
+import org.openjena.riot.ErrorHandlerFactory;
 import org.openjena.riot.Lang;
 import org.openjena.riot.RiotLoader;
+import org.openjena.riot.lang.LabelToNode;
+import org.openjena.riot.lang.LangNQuads;
+import org.openjena.riot.lang.LangNTriples;
 import org.openjena.riot.out.NodeToLabel;
 import org.openjena.riot.out.OutputLangUtils;
+import org.openjena.riot.system.IRIResolver;
 import org.openjena.riot.system.ParserProfile;
+import org.openjena.riot.system.ParserProfileBase;
+import org.openjena.riot.system.Prologue;
 import org.openjena.riot.system.RiotLib;
 import org.openjena.riot.system.SinkExtendTriplesToQuads;
 import org.openjena.riot.tokens.Token;
@@ -84,23 +85,15 @@ import com.hp.hpl.jena.sparql.util.Utils
 import com.hp.hpl.jena.tdb.TDB;
 import com.hp.hpl.jena.tdb.base.block.BlockMgr;
 import com.hp.hpl.jena.tdb.base.block.BlockMgrFactory;
-import com.hp.hpl.jena.tdb.base.file.FileFactory;
 import com.hp.hpl.jena.tdb.base.file.FileSet;
 import com.hp.hpl.jena.tdb.base.file.Location;
-import com.hp.hpl.jena.tdb.base.objectfile.ObjectFile;
 import com.hp.hpl.jena.tdb.base.record.Record;
 import com.hp.hpl.jena.tdb.base.record.RecordFactory;
 import com.hp.hpl.jena.tdb.index.bplustree.BPlusTree;
 import com.hp.hpl.jena.tdb.index.bplustree.BPlusTreeParams;
 import com.hp.hpl.jena.tdb.index.bplustree.BPlusTreeRewriter;
-import com.hp.hpl.jena.tdb.lib.NodeLib;
-import com.hp.hpl.jena.tdb.nodetable.NodeTable;
-import com.hp.hpl.jena.tdb.nodetable.NodeTupleTable;
 import com.hp.hpl.jena.tdb.solver.stats.Stats;
-import com.hp.hpl.jena.tdb.solver.stats.StatsCollectorNodeId;
 import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
-import com.hp.hpl.jena.tdb.store.Hash;
-import com.hp.hpl.jena.tdb.store.NodeId;
 import com.hp.hpl.jena.tdb.store.bulkloader.BulkLoader;
 import com.hp.hpl.jena.tdb.sys.Names;
 import com.hp.hpl.jena.tdb.sys.SetupTDB;
@@ -125,13 +118,15 @@ public class tdbloader2 extends CmdGener
     private static boolean compression ;
     private static boolean gzip_outside = false ;
     private static int buffer_size = 8192 ;
-    private static int spill_size = 1000000 ;
-    private static boolean no_stats = false ;
+    public static int spill_size = 1000000 ;
+    public static boolean no_stats = false ;
     private static boolean no_buffer = false ;
     
     private ThresholdPolicyCount<Tuple<Long>> policy ;
     private Comparator<Tuple<Long>> comparator = new TupleComparator();
     
+    private static String runId = String.valueOf(System.currentTimeMillis());
+    
     public static void main(String...argv)
     {
         CmdTDB.setLogging() ;
@@ -210,11 +205,16 @@ public class tdbloader2 extends CmdGener
                 cmdLog.info("Load: "+filename+" -- "+Utils.nowAsString()) ;
             
             InputStream in = IO.openFile(filename) ;
+            Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(in) ;
+            ParserProfile profile = createParserProfile(runId, filename);
             Lang lang = Lang.guess(filename, Lang.NQUADS) ;
-            if ( lang.isTriples() )
-                RiotLoader.readTriples(in, lang, null, sink2) ;
-            else
-                RiotLoader.readQuads(in, lang, null, sink) ;
+            if ( lang.isTriples() ) {
+                LangNTriples parser = new LangNTriples(tokenizer, profile, sink2) ;
+                parser.parse() ;
+            } else {
+                LangNQuads parser = new LangNQuads(tokenizer, profile, sink) ;
+                parser.parse() ;
+            }
             IO.close(in) ; // TODO: final {}
         }
         sink.close() ;
@@ -255,7 +255,7 @@ public class tdbloader2 extends CmdGener
         print ( monitorTotal ) ;
     }
     
-    private static void print ( ProgressLogger monitor ) {
+    public static void print ( ProgressLogger monitor ) {
         long time = monitor.finish() ;
 
         long total = monitor.getTicks() ;
@@ -353,285 +353,28 @@ public class tdbloader2 extends CmdGener
         FileOps.delete(location.absolute(indexName, Names.bptExt2)) ;
     }
 
-    static class NodeTableBuilder implements Sink<Quad>
-    {
-        private NodeTable nodeTable ;
-        private DataBag<Tuple<Long>> outputTriples ;
-        private DataBag<Tuple<Long>> outputQuads ;
-        private ProgressLogger monitor ;
-        private StatsCollectorNodeId stats ;
-
-        NodeTableBuilder(DatasetGraphTDB dsg, ProgressLogger monitor, DataBag<Tuple<Long>> outputTriples, DataBag<Tuple<Long>> outputQuads)
-        {
-            this.monitor = monitor ;
-            NodeTupleTable ntt = dsg.getTripleTable().getNodeTupleTable() ; 
-            this.nodeTable = ntt.getNodeTable() ;
-            this.outputTriples = outputTriples ; 
-            this.outputQuads = outputQuads ; 
-            this.stats = new StatsCollectorNodeId() ;
-        }
-        
-        @Override
-        public void send(Quad quad)
-        {
-            Node s = quad.getSubject() ;
-            Node p = quad.getPredicate() ;
-            Node o = quad.getObject() ;
-            Node g = null ;
-            // Union graph?!
-            if ( ! quad.isTriple() && ! quad.isDefaultGraph() )
-                g = quad.getGraph() ;
-            
-            NodeId sId = nodeTable.getAllocateNodeId(s) ; 
-            NodeId pId = nodeTable.getAllocateNodeId(p) ;
-            NodeId oId = nodeTable.getAllocateNodeId(o) ;
-            
-            if ( g != null )
-            {
-                NodeId gId = nodeTable.getAllocateNodeId(g) ;
-                outputQuads.send(Tuple.create(gId.getId(), sId.getId(), pId.getId(), oId.getId())) ;
-                if ( !no_stats ) stats.record(gId, sId, pId, oId) ;
-            }
-            else
-            {
-                outputTriples.send(Tuple.create(sId.getId(), pId.getId(), oId.getId())) ;
-                if ( !no_stats ) stats.record(null, sId, pId, oId) ;
-            }
-            monitor.tick() ;
-        }
-
-        @Override
-        public void flush()
-        {
-            outputTriples.flush() ;
-            outputQuads.flush() ;
-            nodeTable.sync() ;
-        }
-
-        @Override
-        public void close() { 
-            flush() ;
-        }
-        
-        public StatsCollectorNodeId getCollector() { return stats ; }
-    }
-    
-    static class NodeTableBuilder2 implements Sink<Quad>
-    {
-        private DatasetGraphTDB dsg ;
-        private ObjectFile objects ;
-        private DataBag<Tuple<Long>> outputTriples ;
-        private DataBag<Tuple<Long>> outputQuads ;
-        private ProgressLogger monitor ;
-        private StatsCollectorNodeId stats ;
-        
-        private ThresholdPolicyCount<Pair<byte[], byte[]>> policy = new ThresholdPolicyCount<Pair<byte[], byte[]>>(spill_size) ;
-        private DataBag<Pair<byte[], byte[]>> sdb01 ;
-        private DataBag<Pair<byte[], byte[]>> sdb02 ;
-        private DataBag<Pair<byte[], byte[]>> sdb03 ;
-        
-        private MessageDigest digest ;
-
-        NodeTableBuilder2(DatasetGraphTDB dsg, ProgressLogger monitor, DataBag<Tuple<Long>> outputTriples, DataBag<Tuple<Long>> outputQuads)
-        {
-//            dsg.getTripleTable().getNodeTupleTable().close() ; 
-
-            this.dsg = dsg ;
-            this.monitor = monitor ;
-
-            String filename = new FileSet(dsg.getLocation(), Names.indexId2Node).filename(Names.extNodeData) ;
-            this.objects = FileFactory.createObjectFileDisk(filename) ; 
-            
-            this.outputTriples = outputTriples ; 
-            this.outputQuads = outputQuads ; 
-            this.stats = new StatsCollectorNodeId() ;
-            
-            this.sdb01 = new MultiThreadedSortedDataBag<Pair<byte[], byte[]>>(policy, new PairSerializationFactory(), new PairComparator());
-            this.sdb02 = new MultiThreadedSortedDataBag<Pair<byte[], byte[]>>(policy, new PairSerializationFactory(), new PairComparator());
-            this.sdb03 = new MultiThreadedSortedDataBag<Pair<byte[], byte[]>>(policy, new PairSerializationFactory(), new PairComparator());
-            
-            try {
-                this.digest = MessageDigest.getInstance("MD5") ;               
-            } catch (NoSuchAlgorithmException e) {
-                throw new AtlasException(e) ;
-            }
-
-        }
-        
-        @Override
-        public void send(Quad quad)
-        {
-            Node s = quad.getSubject() ;
-            Node p = quad.getPredicate() ;
-            Node o = quad.getObject() ;
-            Node g = null ;
-            // Union graph?!
-            if ( ! quad.isTriple() && ! quad.isDefaultGraph() )
-                g = quad.getGraph() ;
-            
-            try {
-                digest.reset() ;
-                digest.update(s.toString().getBytes("UTF-8")) ; // TODO: should we do something better here?
-                digest.update(p.toString().getBytes("UTF-8")) ;
-                digest.update(o.toString().getBytes("UTF-8")) ;
-                if ( g != null )
-                    digest.update(g.toString().getBytes("UTF-8")) ;
-
-                String md5 = new String(Hex.encodeHex(digest.digest())) ;
-                sdb01.add(new Pair<byte[], byte[]>(serialize(s).getBytes("UTF-8"), (md5 + "|s").getBytes("UTF-8"))) ;
-                sdb01.add(new Pair<byte[], byte[]>(serialize(p).getBytes("UTF-8"), (md5 + "|p").getBytes("UTF-8"))) ;
-                sdb01.add(new Pair<byte[], byte[]>(serialize(o).getBytes("UTF-8"), (md5 + "|o").getBytes("UTF-8"))) ;
-                if ( g != null )
-                    sdb01.add(new Pair<byte[], byte[]>(serialize(g).getBytes("UTF-8"), (md5 + "|g").getBytes("UTF-8"))) ;
-            } catch (UnsupportedEncodingException e) {
-                throw new AtlasException(e) ;
-            }
-
-            monitor.tick() ;
-        }
-
-        @Override
-        public void flush()
-        {
-            // TODO
-        }
-
-        @Override
-        public void close() { 
-        	flush() ;
-        	
-            try {
-                cmdLog.info("Node Table (1/3): building nodes.dat and sorting hash|id ...") ;
-                ProgressLogger monitor01 = new ProgressLogger(cmdLog, "records for node table (1/3) phase", BulkLoader.DataTickPoint,BulkLoader.superTick) ;
-                monitor01.start() ;
-                String curr = null ;
-                long id = -1L;
-                Iterator<Pair<byte[], byte[]>> iter01 = sdb01.iterator() ;
-                while ( iter01.hasNext() ) {
-                    Pair<byte[], byte[]> pair01 = iter01.next() ;
-                    String leftIn = new String(pair01.getLeft(), "UTF-8") ;
-                    String rightIn = new String(pair01.getRight(), "UTF-8") ;
-                    if ( ! leftIn.equals(curr) ) {
-                        curr = leftIn ;
-                        // generate the node id
-                        Node node = parse(leftIn) ;
-                        id = NodeLib.encodeStore(node, objects) ;
-                        // add to hash|id
-                        Hash hash = new Hash(SystemTDB.LenNodeHash);
-                        setHash(hash, node);
-                        sdb03.add (new Pair<byte[], byte[]>(hash.getBytes(), Bytes.packLong(id))) ;
-                    }
-//                    System.out.println ("< ( " + leftIn + ", " + rightIn + " )") ;
-                    String tokens[] = rightIn.split("\\|") ;
-                    String leftOut = tokens[0] ;
-                    String rightOut = id + "|" + tokens[1] ;
-//                    System.out.println ("> ( " + leftOut + ", " + rightOut + " )") ;
-                    Pair<byte[], byte[]> pair02 = new Pair<byte[], byte[]>(leftOut.getBytes("UTF-8"), rightOut.getBytes("UTF-8")) ;
-                    sdb02.add(pair02) ;
-                    monitor01.tick() ;
-                }
-                sdb01.close() ;
-                sdb01 = null ;
-                print ( monitor01 ) ;
-                
-                cmdLog.info("Node Table (2/3): generating input data using node ids...") ;
-                final ProgressLogger monitor02 = new ProgressLogger(cmdLog, "records for node table (2/3) phase", BulkLoader.DataTickPoint,BulkLoader.superTick) ;
-                monitor02.start() ;
-                Iterator<Pair<byte[], byte[]>> iter02 = sdb02.iterator() ;
-                curr = null ;
-                Long s = null ;
-                Long p = null ;
-                Long o = null ;
-                Long g = null ;
-                while ( iter02.hasNext() ) {
-                    Pair<byte[], byte[]> pair02 = iter02.next() ;
-                    String leftIn = new String(pair02.getLeft(), "UTF-8") ;
-                    String rightIn = new String(pair02.getRight(), "UTF-8") ;
-//                    System.out.println ("< ( " + leftIn + ", " + rightIn + " )") ;
-                    if ( curr == null ) curr = leftIn ;
-                    if ( ! leftIn.equals(curr) ) {
-                        curr = leftIn ;
-                        write (g, s, p, o) ;
-                        s = null ;
-                        p = null ;
-                        o = null ;
-                        g = null ;
-                        monitor02.tick() ;
-                    }
-                    String tokens[] = rightIn.split("\\|") ;
-                    if ( "s".equals(tokens[1]) ) s = Long.parseLong(tokens[0]) ;
-                    else if ( "p".equals(tokens[1]) ) p = Long.parseLong(tokens[0]) ;
-                    else if ( "o".equals(tokens[1]) ) o = Long.parseLong(tokens[0]) ;
-                    else if ( "g".equals(tokens[1]) ) g = Long.parseLong(tokens[0]) ;
-                    
-                }
-                write (g, s, p, o) ; // ensure we write the last triple|quad
-                sdb02.close() ;
-                sdb02 = null ;
-                print ( monitor02 ) ;
-            } catch (UnsupportedEncodingException e) {
-                throw new AtlasException(e) ;
-            }
-            
-            
-            // Node table B+Tree index (i.e. node2id.dat/idn)
-            cmdLog.info("Node Table (3/3): building node table B+Tree index (i.e. node2id.dat and node2id.idn files)...") ;
-            final ProgressLogger monitor03 = new ProgressLogger(cmdLog, "records for node table (3/3) phase", BulkLoader.DataTickPoint,BulkLoader.superTick) ;
-            monitor03.start() ;
-            String path = dsg.getLocation().getDirectoryPath() ;
-            new File(path, "node2id.dat").delete() ;
-            new File(path, "node2id.idn").delete() ;
-            
-            final RecordFactory recordFactory = new RecordFactory(LenNodeHash, SizeOfNodeId) ;
-            Transform<Pair<byte[], byte[]>, Record> transformPair2Record = new Transform<Pair<byte[], byte[]>, Record>() {
-                @Override public Record convert(Pair<byte[], byte[]> pair) {
-                    monitor03.tick() ;
-                    return recordFactory.create(pair.getLeft(), pair.getRight()) ;
-                }
-            };
 
-            int order = BPlusTreeParams.calcOrder(SystemTDB.BlockSize, recordFactory) ;
-            BPlusTreeParams bptParams = new BPlusTreeParams(order, recordFactory) ;
-            int readCacheSize = 10 ;
-            int writeCacheSize = 100 ;
-            FileSet destination = new FileSet(dsg.getLocation(), Names.indexNode2Id) ;
-            BlockMgr blkMgrNodes = BlockMgrFactory.create(destination, Names.bptExt1, SystemTDB.BlockSize, readCacheSize, writeCacheSize) ;
-            BlockMgr blkMgrRecords = BlockMgrFactory.create(destination, Names.bptExt2, SystemTDB.BlockSize, readCacheSize, writeCacheSize) ;
-            Iterator<Record> iter2 = Iter.iter(sdb03.iterator()).map(transformPair2Record) ;
-            BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(iter2, bptParams, recordFactory, blkMgrNodes, blkMgrRecords) ;
-            bpt2.sync() ;
-            sdb03.close() ;
-            sdb03 = null ;
-            print ( monitor03 ) ;
-
-            outputTriples.flush() ;
-            outputQuads.flush() ;
-            objects.sync() ;
-        }
-        
-        public StatsCollectorNodeId getCollector() { return stats ; }
-        
-        private void write (Long g, Long s, Long p, Long o) {
-//            System.out.println ("> ( " + g + ", " + s + ", " + p + ", " + o + " )") ;
-            if ( g != null ) {
-                outputQuads.add(Tuple.create(g, s, p, o)) ;
-                stats.record(new NodeId(g), new NodeId(s), new NodeId(p), new NodeId(o)) ;
-            } else {
-                outputTriples.add(Tuple.create(s, p, o)) ;
-                stats.record(null, new NodeId(s), new NodeId(p), new NodeId(o)) ;
-            }
-        }
-    }
-
-    public static final NodeToLabel nodeToLabel = NodeToLabel.createScopeByDocument();
+    public static final NodeToLabel nodeToLabel = NodeToLabel.createBNodeByLabelAsGiven();
     public static String serialize(Node node) {
         StringWriter out = new StringWriter();
         OutputLangUtils.output(out, node, null, nodeToLabel);
         return out.toString();
     }
     
-    private static Node parse(String string) {
-        ParserProfile profile = RiotLib.profile(Lang.NQUADS, null, null) ;
+    private static ParserProfile createParserProfile(String runId, String filename) {
+        Prologue prologue = new Prologue(null, IRIResolver.createNoResolve()); 
+        LabelToNode labelMapping = new CustomLabelToNode(runId, filename);
+        return new ParserProfileBase(prologue, ErrorHandlerFactory.errorHandlerStd, labelMapping);
+    }
+
+    private static ParserProfile createParserProfile() {
+        Prologue prologue = new Prologue(null, IRIResolver.createNoResolve()); 
+        LabelToNode labelMapping = LabelToNode.createUseLabelAsGiven() ;
+        return new ParserProfileBase(prologue, ErrorHandlerFactory.errorHandlerStd, labelMapping);
+    }
+    
+    private static ParserProfile profile = createParserProfile();
+    public static Node parse(String string) {
         Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(string) ;
         if ( ! tokenizer.hasNext() )
             return null ;
@@ -654,231 +397,7 @@ public class tdbloader2 extends CmdGener
         return this.getClass().getName() ;
     }
 
-    class TripleSerializationFactory implements SerializationFactory<Tuple<Long>> {
-        @Override public Iterator<Tuple<Long>> createDeserializer(InputStream in) { return new TupleInputStream(in, 3); }
-        @Override public Sink<Tuple<Long>> createSerializer(OutputStream out) { return new TupleOutputStream(out); }
-        @Override public long getEstimatedMemorySize(Tuple<Long> item) { throw new AtlasException("Method not implemented.") ; }
-    }
-
-    class QuadSerializationFactory implements SerializationFactory<Tuple<Long>> {
-        @Override public Iterator<Tuple<Long>> createDeserializer(InputStream in) { return new TupleInputStream(in, 4); }
-        @Override public Sink<Tuple<Long>> createSerializer(OutputStream out) { return new TupleOutputStream(out); }
-        @Override public long getEstimatedMemorySize(Tuple<Long> item) { throw new AtlasException("Method not implemented.") ; }
-    }
-
-    class TupleComparator implements Comparator<Tuple<Long>> {
-        @Override
-        public int compare(Tuple<Long> t1, Tuple<Long> t2) {
-            int size = t1.size();
-            if ( size != t2.size() ) throw new AtlasException("Cannot compare tuple of different sizes.") ;
-            for ( int i = 0; i < size; i++ ) {
-                int result = t1.get(i).compareTo(t2.get(i)) ;
-                if ( result != 0 ) {
-                    return result ;
-                }
-            }
-            return 0;
-        }
-    }
-    
-    class TupleInputStream implements Iterator<Tuple<Long>>, Closeable {
-
-        private DataInputStream in ;
-        private int size ;
-        private Tuple<Long> slot = null ;
-        
-        public TupleInputStream(InputStream in, int size) {
-            this.in = createDataInputStream(in) ;
-            this.size = size ;
-            slot = readNext() ;
-        }
-
-        @Override
-        public boolean hasNext() {
-            return slot != null ;
-        }
-
-        @Override
-        public Tuple<Long> next() {
-            Tuple<Long> result = slot ;
-            slot = readNext() ;
-            return result ;
-        }
-        
-        private Tuple<Long> readNext() {
-            try {
-                if ( size == 3 ) {
-                    return Tuple.create(in.readLong(), in.readLong(), in.readLong()) ;
-                } else if ( size == 4 ) {
-                    return Tuple.create(in.readLong(), in.readLong(), in.readLong(), in.readLong()) ;
-                } else {
-                    throw new AtlasException("Unsupported size.") ;
-                }
-            } catch (IOException e) {
-                return null ;
-            }
-        }
-
-        @Override
-        public void remove() {
-            throw new AtlasException("Method not implemented.") ;
-        }
-
-        @Override
-        public void close() {
-            try {
-                in.close() ;
-            } catch (IOException e) {
-                new AtlasException(e) ;
-            }        
-        }
-        
-    }
-    
-    class TupleOutputStream implements Sink<Tuple<Long>> {
-
-        private DataOutputStream out ;
-        
-        public TupleOutputStream(OutputStream out) {
-            this.out = createDataOutputStream(out) ;
-        }
-
-        @Override
-        public void send(Tuple<Long> tuple) {
-            Iterator<Long> iter = tuple.iterator() ;
-            while ( iter.hasNext() ) {
-                try {
-                    out.writeLong( iter.next() ) ;
-                } catch (IOException e) {
-                    new AtlasException(e) ;
-                }
-            }
-        }
-
-        @Override
-        public void flush() {
-            try {
-                out.flush() ;
-            } catch (IOException e) {
-                new AtlasException(e) ;
-            }
-        }
-
-        @Override
-        public void close() {
-            try {
-                out.close() ;
-            } catch (IOException e) {
-                new AtlasException(e) ;
-            }
-        }
-        
-    }
-
-    static class PairSerializationFactory implements SerializationFactory<Pair<byte[], byte[]>> {
-        @Override public Iterator<Pair<byte[], byte[]>> createDeserializer(InputStream in) { return new PairInputStream(in); }
-        @Override public Sink<Pair<byte[], byte[]>> createSerializer(OutputStream out) { return new PairOutputStream(out); }
-        @Override public long getEstimatedMemorySize(Pair<byte[], byte[]> item) { throw new AtlasException("Method not implemented.") ; }
-    }
-
-    static class PairComparator implements Comparator<Pair<byte[], byte[]>> {
-        @Override
-        public int compare(Pair<byte[], byte[]> p1, Pair<byte[], byte[]> p2) {
-            return Bytes.compare(p1.getLeft(), p2.getLeft()) ;
-        }
-    }
-
-    static class PairInputStream implements Iterator<Pair<byte[], byte[]>>, Closeable {
-
-        private DataInputStream in ;
-        private Pair<byte[], byte[]> slot = null ;
-        
-        public PairInputStream(InputStream in) {
-            this.in = createDataInputStream(in) ;
-            slot = readNext() ;
-        }
-
-        @Override
-        public boolean hasNext() {
-            return slot != null ;
-        }
-
-        @Override
-        public Pair<byte[], byte[]> next() {
-            Pair<byte[], byte[]> result = slot ;
-            slot = readNext() ;
-            return result ;
-        }
-        
-        private Pair<byte[], byte[]> readNext() {
-            try {
-                byte left[] = new byte[in.readInt()] ;
-                in.readFully(left) ;
-                byte right[] = new byte[in.readInt()] ;
-                in.readFully(right) ;
-                return new Pair<byte[], byte[]> (left, right) ;
-            } catch (IOException e) {
-                return null ;
-            }
-        }
-
-        @Override
-        public void remove() {
-            throw new AtlasException("Method not implemented.") ;
-        }
-
-        @Override
-        public void close() {
-            try {
-                in.close() ;
-            } catch (IOException e) {
-                new AtlasException(e) ;
-            }        
-        }
-        
-    }
-    
-    static class PairOutputStream implements Sink<Pair<byte[], byte[]>> {
-
-        private DataOutputStream out ;
-        
-        public PairOutputStream(OutputStream out) {
-            this.out = createDataOutputStream(out) ;
-        }
-
-        @Override
-        public void send(Pair<byte[], byte[]> pair) {
-            try {
-                out.writeInt(pair.getLeft().length) ;
-                out.write(pair.getLeft()) ;
-                out.writeInt(pair.getRight().length) ;
-                out.write(pair.getRight()) ;
-            } catch (IOException e) {
-                new AtlasException(e) ;
-            }
-        }
-
-        @Override
-        public void flush() {
-            try {
-                out.flush() ;
-            } catch (IOException e) {
-                new AtlasException(e) ;
-            }
-        }
-
-        @Override
-        public void close() {
-            try {
-                out.close() ;
-            } catch (IOException e) {
-                new AtlasException(e) ;
-            }
-        }
-        
-    }
-
-    private static DataOutputStream createDataOutputStream(OutputStream out) {
+    public static DataOutputStream createDataOutputStream(OutputStream out) {
         try {
             if ( no_buffer ) {
                 return new DataOutputStream( compression ? new GZIPOutputStream(out) : out ) ;
@@ -895,7 +414,7 @@ public class tdbloader2 extends CmdGener
         }
     }
     
-    private static DataInputStream createDataInputStream(InputStream in) {
+    public static DataInputStream createDataInputStream(InputStream in) {
         try {
             if ( no_buffer ) {
                 return new DataInputStream( compression ? new GZIPInputStream(in) : in ) ;

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/CustomLabelToNode.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/CustomLabelToNode.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/CustomLabelToNode.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/CustomLabelToNode.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.tdbloader2;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.openjena.riot.lang.LabelToNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.rdf.model.AnonId;
+
+public class CustomLabelToNode extends LabelToNode { 
+
+	private static final Logger log = LoggerFactory.getLogger(LabelToNode.class);
+	
+    public CustomLabelToNode(String runId, String filename) {
+        super(new SingleScopePolicy(), new LabelAllocator(runId, filename));
+    }
+    
+    private static class SingleScopePolicy implements ScopePolicy<String, Node, Node> { 
+        private Map<String, Node> map = new HashMap<String, Node>() ;
+        @Override public Map<String, Node> getScope(Node scope) { return map ; }
+        @Override public void clear() { map.clear(); }
+    }
+    
+    private static class LabelAllocator implements Allocator<String, Node> {
+        
+        private String runId ;
+        private String filename ;
+
+        public LabelAllocator (String runId, String filename) {
+        	// This is to ensure that blank node allocation policy is constant when subsequent processing happens
+            this.runId = runId ;
+            this.filename = filename ;
+        	log.debug("LabelAllocator({}, {})", runId, filename) ;
+        }
+
+        @Override 
+        public Node create(String label) {
+        	String strLabel = "mrbnode_" + runId.hashCode() + "_" + filename.hashCode() + "_" + label;
+        	log.debug ("create({}) = {}", label, strLabel);
+            return Node.createAnon(new AnonId(strLabel)) ;
+        }
+
+        @Override public void reset() {}
+    };
+    
+}

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/CustomLabelToNode.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,78 @@
+package org.apache.jena.tdbloader2;
+
+import org.openjena.atlas.data.DataBag;
+import org.openjena.atlas.lib.Sink;
+import org.openjena.atlas.lib.Tuple;
+
+import cmd.tdbloader2;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.core.Quad;
+import com.hp.hpl.jena.tdb.nodetable.NodeTable;
+import com.hp.hpl.jena.tdb.nodetable.NodeTupleTable;
+import com.hp.hpl.jena.tdb.solver.stats.StatsCollectorNodeId;
+import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
+import com.hp.hpl.jena.tdb.store.NodeId;
+
+public class NodeTableBuilder implements Sink<Quad>
+{
+    private NodeTable nodeTable ;
+    private DataBag<Tuple<Long>> outputTriples ;
+    private DataBag<Tuple<Long>> outputQuads ;
+    private ProgressLogger monitor ;
+    private StatsCollectorNodeId stats ;
+
+    NodeTableBuilder(DatasetGraphTDB dsg, ProgressLogger monitor, DataBag<Tuple<Long>> outputTriples, DataBag<Tuple<Long>> outputQuads)
+    {
+        this.monitor = monitor ;
+        NodeTupleTable ntt = dsg.getTripleTable().getNodeTupleTable() ; 
+        this.nodeTable = ntt.getNodeTable() ;
+        this.outputTriples = outputTriples ; 
+        this.outputQuads = outputQuads ; 
+        this.stats = new StatsCollectorNodeId() ;
+    }
+    
+    @Override
+    public void send(Quad quad)
+    {
+        Node s = quad.getSubject() ;
+        Node p = quad.getPredicate() ;
+        Node o = quad.getObject() ;
+        Node g = null ;
+        // Union graph?!
+        if ( ! quad.isTriple() && ! quad.isDefaultGraph() )
+            g = quad.getGraph() ;
+        
+        NodeId sId = nodeTable.getAllocateNodeId(s) ; 
+        NodeId pId = nodeTable.getAllocateNodeId(p) ;
+        NodeId oId = nodeTable.getAllocateNodeId(o) ;
+        
+        if ( g != null )
+        {
+            NodeId gId = nodeTable.getAllocateNodeId(g) ;
+            outputQuads.send(Tuple.create(gId.getId(), sId.getId(), pId.getId(), oId.getId())) ;
+            if ( !tdbloader2.no_stats ) stats.record(gId, sId, pId, oId) ;
+        }
+        else
+        {
+            outputTriples.send(Tuple.create(sId.getId(), pId.getId(), oId.getId())) ;
+            if ( !tdbloader2.no_stats ) stats.record(null, sId, pId, oId) ;
+        }
+        monitor.tick() ;
+    }
+
+    @Override
+    public void flush()
+    {
+        outputTriples.flush() ;
+        outputQuads.flush() ;
+        nodeTable.sync() ;
+    }
+
+    @Override
+    public void close() { 
+        flush() ;
+    }
+    
+    public StatsCollectorNodeId getCollector() { return stats ; }
+}

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,259 @@
+package org.apache.jena.tdbloader2;
+
+import static com.hp.hpl.jena.tdb.lib.NodeLib.setHash;
+import static com.hp.hpl.jena.tdb.sys.SystemTDB.LenNodeHash;
+import static com.hp.hpl.jena.tdb.sys.SystemTDB.SizeOfNodeId;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Iterator;
+
+import org.apache.commons.codec.binary.Hex;
+import org.openjena.atlas.AtlasException;
+import org.openjena.atlas.data.DataBag;
+import org.openjena.atlas.data.ThresholdPolicyCount;
+import org.openjena.atlas.iterator.Iter;
+import org.openjena.atlas.iterator.Transform;
+import org.openjena.atlas.lib.Bytes;
+import org.openjena.atlas.lib.Pair;
+import org.openjena.atlas.lib.Sink;
+import org.openjena.atlas.lib.Tuple;
+import org.slf4j.Logger;
+
+import cmd.tdbloader2;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.core.Quad;
+import com.hp.hpl.jena.tdb.base.block.BlockMgr;
+import com.hp.hpl.jena.tdb.base.block.BlockMgrFactory;
+import com.hp.hpl.jena.tdb.base.file.FileFactory;
+import com.hp.hpl.jena.tdb.base.file.FileSet;
+import com.hp.hpl.jena.tdb.base.objectfile.ObjectFile;
+import com.hp.hpl.jena.tdb.base.record.Record;
+import com.hp.hpl.jena.tdb.base.record.RecordFactory;
+import com.hp.hpl.jena.tdb.index.bplustree.BPlusTree;
+import com.hp.hpl.jena.tdb.index.bplustree.BPlusTreeParams;
+import com.hp.hpl.jena.tdb.index.bplustree.BPlusTreeRewriter;
+import com.hp.hpl.jena.tdb.lib.NodeLib;
+import com.hp.hpl.jena.tdb.solver.stats.StatsCollectorNodeId;
+import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
+import com.hp.hpl.jena.tdb.store.Hash;
+import com.hp.hpl.jena.tdb.store.NodeId;
+import com.hp.hpl.jena.tdb.store.bulkloader.BulkLoader;
+import com.hp.hpl.jena.tdb.sys.Names;
+import com.hp.hpl.jena.tdb.sys.SystemTDB;
+
+public class NodeTableBuilder2 implements Sink<Quad>
+{
+    private DatasetGraphTDB dsg ;
+    private ObjectFile objects ;
+    private DataBag<Tuple<Long>> outputTriples ;
+    private DataBag<Tuple<Long>> outputQuads ;
+    private ProgressLogger monitor ;
+    private StatsCollectorNodeId stats ;
+    
+    private ThresholdPolicyCount<Pair<byte[], byte[]>> policy = new ThresholdPolicyCount<Pair<byte[], byte[]>>(tdbloader2.spill_size) ;
+    private DataBag<Pair<byte[], byte[]>> sdb01 ;
+    private DataBag<Pair<byte[], byte[]>> sdb02 ;
+    private DataBag<Pair<byte[], byte[]>> sdb03 ;
+    
+    private MessageDigest digest ;
+    
+    private final Logger log ;
+
+    public NodeTableBuilder2(DatasetGraphTDB dsg, ProgressLogger monitor, DataBag<Tuple<Long>> outputTriples, DataBag<Tuple<Long>> outputQuads)
+    {
+//        dsg.getTripleTable().getNodeTupleTable().close() ; 
+
+        this.dsg = dsg ;
+        this.monitor = monitor ;
+
+        String filename = new FileSet(dsg.getLocation(), Names.indexId2Node).filename(Names.extNodeData) ;
+        this.objects = FileFactory.createObjectFileDisk(filename) ; 
+        
+        this.outputTriples = outputTriples ; 
+        this.outputQuads = outputQuads ; 
+        this.stats = new StatsCollectorNodeId() ;
+        
+        this.sdb01 = new MultiThreadedSortedDataBag<Pair<byte[], byte[]>>(policy, new PairSerializationFactory(), new PairComparator());
+        this.sdb02 = new MultiThreadedSortedDataBag<Pair<byte[], byte[]>>(policy, new PairSerializationFactory(), new PairComparator());
+        this.sdb03 = new MultiThreadedSortedDataBag<Pair<byte[], byte[]>>(policy, new PairSerializationFactory(), new PairComparator());
+        
+        try {
+            this.digest = MessageDigest.getInstance("MD5") ;               
+        } catch (NoSuchAlgorithmException e) {
+            throw new AtlasException(e) ;
+        }
+        
+        this.log = monitor.getLogger() ;
+
+    }
+    
+    @Override
+    public void send(Quad quad)
+    {
+        Node s = quad.getSubject() ;
+        Node p = quad.getPredicate() ;
+        Node o = quad.getObject() ;
+        Node g = null ;
+        // Union graph?!
+        if ( ! quad.isTriple() && ! quad.isDefaultGraph() )
+            g = quad.getGraph() ;
+        
+        try {
+            digest.reset() ;
+            digest.update(s.toString().getBytes("UTF-8")) ; // TODO: should we do something better here?
+            digest.update(p.toString().getBytes("UTF-8")) ;
+            digest.update(o.toString().getBytes("UTF-8")) ;
+            if ( g != null )
+                digest.update(g.toString().getBytes("UTF-8")) ;
+
+            String md5 = new String(Hex.encodeHex(digest.digest())) ;
+            sdb01.add(new Pair<byte[], byte[]>(tdbloader2.serialize(s).getBytes("UTF-8"), (md5 + "|s").getBytes("UTF-8"))) ;
+            sdb01.add(new Pair<byte[], byte[]>(tdbloader2.serialize(p).getBytes("UTF-8"), (md5 + "|p").getBytes("UTF-8"))) ;
+            sdb01.add(new Pair<byte[], byte[]>(tdbloader2.serialize(o).getBytes("UTF-8"), (md5 + "|o").getBytes("UTF-8"))) ;
+            if ( g != null )
+                sdb01.add(new Pair<byte[], byte[]>(tdbloader2.serialize(g).getBytes("UTF-8"), (md5 + "|g").getBytes("UTF-8"))) ;
+        } catch (UnsupportedEncodingException e) {
+            throw new AtlasException(e) ;
+        }
+
+        monitor.tick() ;
+    }
+
+    @Override
+    public void flush()
+    {
+        // TODO
+    }
+
+    @Override
+    public void close() { 
+        flush() ;
+        
+        try {
+            log.info("Node Table (1/3): building nodes.dat and sorting hash|id ...") ;
+            ProgressLogger monitor01 = new ProgressLogger(log, "records for node table (1/3) phase", BulkLoader.DataTickPoint,BulkLoader.superTick) ;
+            monitor01.start() ;
+            String curr = null ;
+            long id = -1L;
+            Iterator<Pair<byte[], byte[]>> iter01 = sdb01.iterator() ;
+            while ( iter01.hasNext() ) {
+                Pair<byte[], byte[]> pair01 = iter01.next() ;
+                String leftIn = new String(pair01.getLeft(), "UTF-8") ;
+                String rightIn = new String(pair01.getRight(), "UTF-8") ;
+                if ( ! leftIn.equals(curr) ) {
+                    curr = leftIn ;
+                    // generate the node id
+                    Node node = tdbloader2.parse(leftIn) ;
+                    id = NodeLib.encodeStore(node, objects) ;
+                    // add to hash|id
+                    Hash hash = new Hash(SystemTDB.LenNodeHash);
+                    setHash(hash, node);
+                    sdb03.add (new Pair<byte[], byte[]>(hash.getBytes(), Bytes.packLong(id))) ;
+                }
+//                System.out.println ("< ( " + leftIn + ", " + rightIn + " )") ;
+                String tokens[] = rightIn.split("\\|") ;
+                String leftOut = tokens[0] ;
+                String rightOut = id + "|" + tokens[1] ;
+//                System.out.println ("> ( " + leftOut + ", " + rightOut + " )") ;
+                Pair<byte[], byte[]> pair02 = new Pair<byte[], byte[]>(leftOut.getBytes("UTF-8"), rightOut.getBytes("UTF-8")) ;
+                sdb02.add(pair02) ;
+                monitor01.tick() ;
+            }
+            sdb01.close() ;
+            sdb01 = null ;
+            tdbloader2.print ( monitor01 ) ;
+            
+            log.info("Node Table (2/3): generating input data using node ids...") ;
+            final ProgressLogger monitor02 = new ProgressLogger(log, "records for node table (2/3) phase", BulkLoader.DataTickPoint,BulkLoader.superTick) ;
+            monitor02.start() ;
+            Iterator<Pair<byte[], byte[]>> iter02 = sdb02.iterator() ;
+            curr = null ;
+            Long s = null ;
+            Long p = null ;
+            Long o = null ;
+            Long g = null ;
+            while ( iter02.hasNext() ) {
+                Pair<byte[], byte[]> pair02 = iter02.next() ;
+                String leftIn = new String(pair02.getLeft(), "UTF-8") ;
+                String rightIn = new String(pair02.getRight(), "UTF-8") ;
+//                System.out.println ("< ( " + leftIn + ", " + rightIn + " )") ;
+                if ( curr == null ) curr = leftIn ;
+                if ( ! leftIn.equals(curr) ) {
+                    curr = leftIn ;
+                    write (g, s, p, o) ;
+                    s = null ;
+                    p = null ;
+                    o = null ;
+                    g = null ;
+                    monitor02.tick() ;
+                }
+                String tokens[] = rightIn.split("\\|") ;
+                if ( "s".equals(tokens[1]) ) s = Long.parseLong(tokens[0]) ;
+                else if ( "p".equals(tokens[1]) ) p = Long.parseLong(tokens[0]) ;
+                else if ( "o".equals(tokens[1]) ) o = Long.parseLong(tokens[0]) ;
+                else if ( "g".equals(tokens[1]) ) g = Long.parseLong(tokens[0]) ;
+                
+            }
+            write (g, s, p, o) ; // ensure we write the last triple|quad
+            sdb02.close() ;
+            sdb02 = null ;
+            tdbloader2.print ( monitor02 ) ;
+        } catch (UnsupportedEncodingException e) {
+            throw new AtlasException(e) ;
+        }
+        
+        
+        // Node table B+Tree index (i.e. node2id.dat/idn)
+        log.info("Node Table (3/3): building node table B+Tree index (i.e. node2id.dat and node2id.idn files)...") ;
+        final ProgressLogger monitor03 = new ProgressLogger(log, "records for node table (3/3) phase", BulkLoader.DataTickPoint,BulkLoader.superTick) ;
+        monitor03.start() ;
+        String path = dsg.getLocation().getDirectoryPath() ;
+        new File(path, "node2id.dat").delete() ;
+        new File(path, "node2id.idn").delete() ;
+        
+        final RecordFactory recordFactory = new RecordFactory(LenNodeHash, SizeOfNodeId) ;
+        Transform<Pair<byte[], byte[]>, Record> transformPair2Record = new Transform<Pair<byte[], byte[]>, Record>() {
+            @Override public Record convert(Pair<byte[], byte[]> pair) {
+                monitor03.tick() ;
+                return recordFactory.create(pair.getLeft(), pair.getRight()) ;
+            }
+        };
+
+        int order = BPlusTreeParams.calcOrder(SystemTDB.BlockSize, recordFactory) ;
+        BPlusTreeParams bptParams = new BPlusTreeParams(order, recordFactory) ;
+        int readCacheSize = 10 ;
+        int writeCacheSize = 100 ;
+        FileSet destination = new FileSet(dsg.getLocation(), Names.indexNode2Id) ;
+        BlockMgr blkMgrNodes = BlockMgrFactory.create(destination, Names.bptExt1, SystemTDB.BlockSize, readCacheSize, writeCacheSize) ;
+        BlockMgr blkMgrRecords = BlockMgrFactory.create(destination, Names.bptExt2, SystemTDB.BlockSize, readCacheSize, writeCacheSize) ;
+        Iterator<Record> iter2 = Iter.iter(sdb03.iterator()).map(transformPair2Record) ;
+        BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(iter2, bptParams, recordFactory, blkMgrNodes, blkMgrRecords) ;
+        bpt2.sync() ;
+        sdb03.close() ;
+        sdb03 = null ;
+        tdbloader2.print ( monitor03 ) ;
+
+        outputTriples.flush() ;
+        outputQuads.flush() ;
+        objects.sync() ;
+    }
+    
+    public StatsCollectorNodeId getCollector() { return stats ; }
+    
+    private void write (Long g, Long s, Long p, Long o) {
+//        System.out.println ("> ( " + g + ", " + s + ", " + p + ", " + o + " )") ;
+        if ( g != null ) {
+            outputQuads.add(Tuple.create(g, s, p, o)) ;
+            stats.record(new NodeId(g), new NodeId(s), new NodeId(p), new NodeId(o)) ;
+        } else {
+            outputTriples.add(Tuple.create(s, p, o)) ;
+            stats.record(null, new NodeId(s), new NodeId(p), new NodeId(o)) ;
+        }
+    }
+}
+
+

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/NodeTableBuilder2.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairComparator.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairComparator.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairComparator.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairComparator.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,13 @@
+package org.apache.jena.tdbloader2;
+
+import java.util.Comparator;
+
+import org.openjena.atlas.lib.Bytes;
+import org.openjena.atlas.lib.Pair;
+
+public class PairComparator implements Comparator<Pair<byte[], byte[]>> {
+    @Override
+    public int compare(Pair<byte[], byte[]> p1, Pair<byte[], byte[]> p2) {
+        return Bytes.compare(p1.getLeft(), p2.getLeft()) ;
+    }
+}
\ No newline at end of file

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairComparator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairInputStream.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairInputStream.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairInputStream.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairInputStream.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,63 @@
+package org.apache.jena.tdbloader2;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import org.openjena.atlas.AtlasException;
+import org.openjena.atlas.lib.Closeable;
+import org.openjena.atlas.lib.Pair;
+
+import cmd.tdbloader2;
+
+public class PairInputStream implements Iterator<Pair<byte[], byte[]>>, Closeable {
+
+    private DataInputStream in ;
+    private Pair<byte[], byte[]> slot = null ;
+    
+    public PairInputStream(InputStream in) {
+        this.in = tdbloader2.createDataInputStream(in) ;
+        slot = readNext() ;
+    }
+
+    @Override
+    public boolean hasNext() {
+        return slot != null ;
+    }
+
+    @Override
+    public Pair<byte[], byte[]> next() {
+        Pair<byte[], byte[]> result = slot ;
+        slot = readNext() ;
+        return result ;
+    }
+    
+    private Pair<byte[], byte[]> readNext() {
+        try {
+            byte left[] = new byte[in.readInt()] ;
+            in.readFully(left) ;
+            byte right[] = new byte[in.readInt()] ;
+            in.readFully(right) ;
+            return new Pair<byte[], byte[]> (left, right) ;
+        } catch (IOException e) {
+            return null ;
+        }
+    }
+
+    @Override
+    public void remove() {
+        throw new AtlasException("Method not implemented.") ;
+    }
+
+    @Override
+    public void close() {
+        try {
+            in.close() ;
+        } catch (IOException e) {
+            new AtlasException(e) ;
+        }        
+    }
+    
+}
+

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairInputStream.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairOutputStream.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairOutputStream.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairOutputStream.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairOutputStream.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,51 @@
+package org.apache.jena.tdbloader2;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.openjena.atlas.AtlasException;
+import org.openjena.atlas.lib.Pair;
+import org.openjena.atlas.lib.Sink;
+
+import cmd.tdbloader2;
+
+public class PairOutputStream implements Sink<Pair<byte[], byte[]>> {
+
+    private DataOutputStream out ;
+    
+    public PairOutputStream(OutputStream out) {
+        this.out = tdbloader2.createDataOutputStream(out) ;
+    }
+
+    @Override
+    public void send(Pair<byte[], byte[]> pair) {
+        try {
+            out.writeInt(pair.getLeft().length) ;
+            out.write(pair.getLeft()) ;
+            out.writeInt(pair.getRight().length) ;
+            out.write(pair.getRight()) ;
+        } catch (IOException e) {
+            new AtlasException(e) ;
+        }
+    }
+
+    @Override
+    public void flush() {
+        try {
+            out.flush() ;
+        } catch (IOException e) {
+            new AtlasException(e) ;
+        }
+    }
+
+    @Override
+    public void close() {
+        try {
+            out.close() ;
+        } catch (IOException e) {
+            new AtlasException(e) ;
+        }
+    }
+    
+}

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairOutputStream.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairSerializationFactory.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairSerializationFactory.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairSerializationFactory.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairSerializationFactory.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,16 @@
+package org.apache.jena.tdbloader2;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Iterator;
+
+import org.openjena.atlas.AtlasException;
+import org.openjena.atlas.data.SerializationFactory;
+import org.openjena.atlas.lib.Pair;
+import org.openjena.atlas.lib.Sink;
+
+public class PairSerializationFactory implements SerializationFactory<Pair<byte[], byte[]>> {
+    @Override public Iterator<Pair<byte[], byte[]>> createDeserializer(InputStream in) { return new PairInputStream(in); }
+    @Override public Sink<Pair<byte[], byte[]>> createSerializer(OutputStream out) { return new PairOutputStream(out); }
+    @Override public long getEstimatedMemorySize(Pair<byte[], byte[]> item) { throw new AtlasException("Method not implemented.") ; }
+}

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/PairSerializationFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/ProgressLogger.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/ProgressLogger.java?rev=1174838&r1=1174837&r2=1174838&view=diff
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/ProgressLogger.java (original)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/ProgressLogger.java Fri Sep 23 15:34:49 2011
@@ -76,6 +76,11 @@ public class ProgressLogger
         }
     }
     
+    public Logger getLogger() 
+    {
+        return this.log ;
+    }
+    
     private void elapsed(long timerReading)
     {
         float elapsedSecs = timerReading/1000F ;

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/QuadSerializationFactory.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/QuadSerializationFactory.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/QuadSerializationFactory.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/QuadSerializationFactory.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,16 @@
+package org.apache.jena.tdbloader2;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Iterator;
+
+import org.openjena.atlas.AtlasException;
+import org.openjena.atlas.data.SerializationFactory;
+import org.openjena.atlas.lib.Sink;
+import org.openjena.atlas.lib.Tuple;
+
+public class QuadSerializationFactory implements SerializationFactory<Tuple<Long>> {
+    @Override public Iterator<Tuple<Long>> createDeserializer(InputStream in) { return new TupleInputStream(in, 4); }
+    @Override public Sink<Tuple<Long>> createSerializer(OutputStream out) { return new TupleOutputStream(out); }
+    @Override public long getEstimatedMemorySize(Tuple<Long> item) { throw new AtlasException("Method not implemented.") ; }
+}
\ No newline at end of file

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/QuadSerializationFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TripleSerializationFactory.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TripleSerializationFactory.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TripleSerializationFactory.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TripleSerializationFactory.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,16 @@
+package org.apache.jena.tdbloader2;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Iterator;
+
+import org.openjena.atlas.AtlasException;
+import org.openjena.atlas.data.SerializationFactory;
+import org.openjena.atlas.lib.Sink;
+import org.openjena.atlas.lib.Tuple;
+
+public class TripleSerializationFactory implements SerializationFactory<Tuple<Long>> {
+    @Override public Iterator<Tuple<Long>> createDeserializer(InputStream in) { return new TupleInputStream(in, 3); }
+    @Override public Sink<Tuple<Long>> createSerializer(OutputStream out) { return new TupleOutputStream(out); }
+    @Override public long getEstimatedMemorySize(Tuple<Long> item) { throw new AtlasException("Method not implemented.") ; }
+}
\ No newline at end of file

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TripleSerializationFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleComparator.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleComparator.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleComparator.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleComparator.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,21 @@
+package org.apache.jena.tdbloader2;
+
+import java.util.Comparator;
+
+import org.openjena.atlas.AtlasException;
+import org.openjena.atlas.lib.Tuple;
+
+public class TupleComparator implements Comparator<Tuple<Long>> {
+    @Override
+    public int compare(Tuple<Long> t1, Tuple<Long> t2) {
+        int size = t1.size();
+        if ( size != t2.size() ) throw new AtlasException("Cannot compare tuple of different sizes.") ;
+        for ( int i = 0; i < size; i++ ) {
+            int result = t1.get(i).compareTo(t2.get(i)) ;
+            if ( result != 0 ) {
+                return result ;
+            }
+        }
+        return 0;
+    }
+}
\ No newline at end of file

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleComparator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleInputStream.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleInputStream.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleInputStream.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleInputStream.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,66 @@
+package org.apache.jena.tdbloader2;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import org.openjena.atlas.AtlasException;
+import org.openjena.atlas.lib.Closeable;
+import org.openjena.atlas.lib.Tuple;
+
+import cmd.tdbloader2;
+
+public class TupleInputStream implements Iterator<Tuple<Long>>, Closeable {
+
+    private DataInputStream in ;
+    private int size ;
+    private Tuple<Long> slot = null ;
+    
+    public TupleInputStream(InputStream in, int size) {
+        this.in = tdbloader2.createDataInputStream(in) ;
+        this.size = size ;
+        slot = readNext() ;
+    }
+
+    @Override
+    public boolean hasNext() {
+        return slot != null ;
+    }
+
+    @Override
+    public Tuple<Long> next() {
+        Tuple<Long> result = slot ;
+        slot = readNext() ;
+        return result ;
+    }
+    
+    private Tuple<Long> readNext() {
+        try {
+            if ( size == 3 ) {
+                return Tuple.create(in.readLong(), in.readLong(), in.readLong()) ;
+            } else if ( size == 4 ) {
+                return Tuple.create(in.readLong(), in.readLong(), in.readLong(), in.readLong()) ;
+            } else {
+                throw new AtlasException("Unsupported size.") ;
+            }
+        } catch (IOException e) {
+            return null ;
+        }
+    }
+
+    @Override
+    public void remove() {
+        throw new AtlasException("Method not implemented.") ;
+    }
+
+    @Override
+    public void close() {
+        try {
+            in.close() ;
+        } catch (IOException e) {
+            new AtlasException(e) ;
+        }        
+    }
+    
+}

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleInputStream.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleOutputStream.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleOutputStream.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleOutputStream.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleOutputStream.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,52 @@
+package org.apache.jena.tdbloader2;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Iterator;
+
+import org.openjena.atlas.AtlasException;
+import org.openjena.atlas.lib.Sink;
+import org.openjena.atlas.lib.Tuple;
+
+import cmd.tdbloader2;
+
+public class TupleOutputStream implements Sink<Tuple<Long>> {
+
+    private DataOutputStream out ;
+    
+    public TupleOutputStream(OutputStream out) {
+        this.out = tdbloader2.createDataOutputStream(out) ;
+    }
+
+    @Override
+    public void send(Tuple<Long> tuple) {
+        Iterator<Long> iter = tuple.iterator() ;
+        while ( iter.hasNext() ) {
+            try {
+                out.writeLong( iter.next() ) ;
+            } catch (IOException e) {
+                new AtlasException(e) ;
+            }
+        }
+    }
+
+    @Override
+    public void flush() {
+        try {
+            out.flush() ;
+        } catch (IOException e) {
+            new AtlasException(e) ;
+        }
+    }
+
+    @Override
+    public void close() {
+        try {
+            out.close() ;
+        } catch (IOException e) {
+            new AtlasException(e) ;
+        }
+    }
+    
+}

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/TupleOutputStream.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/java/cmd/TestTDBLoader2.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/java/cmd/TestTDBLoader2.java?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/java/cmd/TestTDBLoader2.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/java/cmd/TestTDBLoader2.java Fri Sep 23 15:34:49 2011
@@ -0,0 +1,155 @@
+package cmd;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+import org.openjena.atlas.lib.FileOps;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.hp.hpl.jena.graph.Graph;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.tdb.TDBFactory;
+import com.hp.hpl.jena.tdb.TDBLoader;
+import com.hp.hpl.jena.tdb.base.file.Location;
+import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
+import com.hp.hpl.jena.util.iterator.ExtendedIterator;
+
+@RunWith(Parameterized.class)
+public class TestTDBLoader2 {
+
+    private static final Logger log = LoggerFactory.getLogger(TestTDBLoader2.class);
+    
+    @Parameters
+    public static Collection<Object[]> data() {
+        return Arrays.asList(new Object[][] {
+                { "src/test/resources/test-01", "target/output-test-01"}, 
+                { "src/test/resources/test-02", "target/output-test-02" }, 
+                { "src/test/resources/test-03", "target/output-test-03" },
+                { "src/test/resources/test-04", "target/output-test-04" },
+        });
+    }
+
+    private String input ;
+    private String output ;
+    
+    public TestTDBLoader2 ( String input, String output ) {
+        this.input = input ;
+        this.output = output ;
+    }
+
+    @Before public void setup() {
+        if ( FileOps.exists(output) ) {
+            FileOps.clearDirectory(output) ;            
+        } else {
+            FileOps.ensureDir(output);          
+        }
+    }
+    
+    @Test public void test() throws Exception { 
+        run (input, output); 
+    }
+    
+    private void run ( String input, String output ) throws Exception {
+        List<String> urls = new ArrayList<String>();
+        for (File file : new File(input).listFiles()) {
+            if (file.isFile()) {
+                urls.add(file.getAbsolutePath());
+            }
+        }
+        
+        DatasetGraphTDB dsgMem = TDBFactory.createDatasetGraph();
+        TDBLoader.load(dsgMem, urls);
+        
+        String[] args;
+        File path = new File(input);
+        if ( path.isDirectory() ) {
+            ArrayList<String> arguments = new ArrayList<String>();
+            arguments.add("--loc");
+            arguments.add(output);
+            ArrayList<String> datafiles = new ArrayList<String>();
+            File files[] = path.listFiles();
+            for (File file : files) {
+                datafiles.add(file.getAbsolutePath());
+            }
+            arguments.addAll(datafiles);
+            args = arguments.toArray(new String[]{}) ;
+        } else {
+            args = new String[] { "--loc", output, input };
+        }
+
+        tdbloader2.main(args);
+
+        Location location = new Location(output);
+        DatasetGraphTDB dsgDisk = TDBFactory.createDatasetGraph(location);
+        TDBLoader.load(dsgDisk, urls);
+        
+        assertTrue ( dump(dsgMem, dsgDisk), isomorphic ( dsgMem, dsgDisk ) );
+    }
+    
+    private boolean isomorphic(DatasetGraphTDB dsgMem, DatasetGraphTDB dsgDisk) {
+        if (!dsgMem.getDefaultGraphTDB().isIsomorphicWith(dsgDisk.getDefaultGraphTDB()))
+            return false;
+        Iterator<Node> graphsMem = dsgMem.listGraphNodes();
+        Iterator<Node> graphsDisk = dsgDisk.listGraphNodes();
+        
+        Set<Node> seen = new HashSet<Node>();
+
+        while (graphsMem.hasNext()) {
+            Node graphNode = graphsMem.next();
+            if (dsgDisk.getGraphTDB(graphNode) == null) return false;
+            if (!dsgMem.getGraphTDB(graphNode).isIsomorphicWith(dsgDisk.getGraphTDB(graphNode))) return false;
+            seen.add(graphNode);
+        }
+
+        while (graphsDisk.hasNext()) {
+            Node graphNode = graphsDisk.next();
+            if (!seen.contains(graphNode)) {
+                if (dsgMem.getGraphTDB(graphNode) == null) return false;
+                if (!dsgMem.getGraphTDB(graphNode).isIsomorphicWith(dsgDisk.getGraphTDB(graphNode))) return false;
+            }
+        }
+
+        return true;
+    }
+
+    private String dump(DatasetGraphTDB dsgMem, DatasetGraphTDB dsgDisk) {
+        StringBuffer sb = new StringBuffer();
+        sb.append("\n");
+        
+        if (!dsgMem.getDefaultGraphTDB().isIsomorphicWith(dsgDisk.getDefaultGraphTDB())) {
+            sb.append("Default graphs are not isomorphic [FAIL]\n");
+            sb.append("    First:\n");
+            dump(sb, dsgMem.getDefaultGraphTDB());
+            sb.append("    Second:\n");
+            dump(sb, dsgDisk.getDefaultGraphTDB());
+        } else {
+            sb.append("Default graphs are isomorphic [OK]\n");
+        }
+            
+        return sb.toString();
+    }
+    
+    private static void dump (StringBuffer sb, Graph graph) {
+        ExtendedIterator<Triple> iter = graph.find(Node.ANY, Node.ANY, Node.ANY);
+        while ( iter.hasNext() ) {
+            Triple triple = iter.next();
+            sb.append(triple).append("\n");
+        }
+    }
+    
+}

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/java/cmd/TestTDBLoader2.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-01/data.nq
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-01/data.nq?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-01/data.nq (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-01/data.nq Fri Sep 23 15:34:49 2011
@@ -0,0 +1,10 @@
+<http://example.org/alice/foaf.rdf#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> <http://example.org/alice/foaf.rdf> .
+<http://example.org/alice/foaf.rdf#me> <http://xmlns.com/foaf/0.1/name>                  "Alice"                            <http://example.org/alice/foaf.rdf> .
+<http://example.org/alice/foaf.rdf#me> <http://xmlns.com/foaf/0.1/knows>                 _:bnode1                           <http://example.org/alice/foaf.rdf> .
+_:bnode1                               <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> <http://example.org/alice/foaf.rdf> .
+_:bnode1                               <http://xmlns.com/foaf/0.1/name>                  "Bob"                              <http://example.org/alice/foaf.rdf> .
+_:bnode1                               <http://xmlns.com/foaf/0.1/homepage>              <http://example.org/bob/>          <http://example.org/alice/foaf.rdf> .
+_:bnode1                               <http://www.w3.org/2000/01/rdf-schema#seeAlso>   <http://example.org/bob/foaf.rdf>  <http://example.org/alice/foaf.rdf> .
+<http://example.org/bob/foaf.rdf#me>   <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> <http://example.org/bob/foaf.rdf> .
+<http://example.org/bob/foaf.rdf#me>   <http://xmlns.com/foaf/0.1/name>                  "Bob"                              <http://example.org/bob/foaf.rdf> .
+<http://example.org/bob/foaf.rdf#me>   <http://xmlns.com/foaf/0.1/homepage>              <http://example.org/bob/>          <http://example.org/bob/foaf.rdf> .
\ No newline at end of file

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-01/data.nt
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-01/data.nt?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-01/data.nt (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-01/data.nt Fri Sep 23 15:34:49 2011
@@ -0,0 +1 @@
+<http://example.org/alice/foaf.rdf#me> <http://xmlns.com/foaf/0.1/mbox>                  <ma...@example.org> . 

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-02/data.nq
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-02/data.nq?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-02/data.nq (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-02/data.nq Fri Sep 23 15:34:49 2011
@@ -0,0 +1 @@
+<x> <y> <z> . 

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-03/data.nt
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-03/data.nt?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-03/data.nt (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-03/data.nt Fri Sep 23 15:34:49 2011
@@ -0,0 +1 @@
+<foo:x> <foo:y> <foo:z> . 

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-04/data-01.nt
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-04/data-01.nt?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-04/data-01.nt (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-04/data-01.nt Fri Sep 23 15:34:49 2011
@@ -0,0 +1,2 @@
+<foo:x> <foo:y> _:bnode1 .
+_:bnode1 <foo:z> "1" .

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-04/data-02.nt
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-04/data-02.nt?rev=1174838&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-04/data-02.nt (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/test-04/data-02.nt Fri Sep 23 15:34:49 2011
@@ -0,0 +1,2 @@
+<foo:x> <foo:y> _:bnode1 .
+_:bnode1 <foo:z> "2" .