You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by ca...@apache.org on 2011/09/23 09:09:07 UTC

svn commit: r1174559 - in /incubator/jena/Scratch/PC/tdbloader2/trunk: ./ .settings/ src/ src/main/ src/main/java/ src/main/java/cmd/ src/main/java/org/ src/main/java/org/apache/ src/main/java/org/apache/jena/ src/main/java/org/apache/jena/tdbloader2/ ...

Author: castagna
Date: Fri Sep 23 07:09:06 2011
New Revision: 1174559

URL: http://svn.apache.org/viewvc?rev=1174559&view=rev
Log:
JENA-117

Committing this to the Scratch area to make it easier for other to try it or look at it and for me to work on.
It's almost done, but we could use the memory policy threshold instead, spill after we finished adding items to our DataBag(s) so that we free up memory.
We also need to think a better way to merge-sort a large number of files (probably, with multiple steps).

Added:
    incubator/jena/Scratch/PC/tdbloader2/trunk/.classpath   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/.project   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/.settings/
    incubator/jena/Scratch/PC/tdbloader2/trunk/.settings/org.eclipse.jdt.core.prefs   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/README
    incubator/jena/Scratch/PC/tdbloader2/trunk/pom.xml   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/cmd/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/cmd/tdbloader2.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/ProgressLogger.java   (with props)
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/resources/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/java/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/
    incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/log4j.properties   (with props)
Modified:
    incubator/jena/Scratch/PC/tdbloader2/trunk/   (props changed)

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Fri Sep 23 07:09:06 2011
@@ -0,0 +1 @@
+target

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/.classpath
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/.classpath?rev=1174559&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/.classpath (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/.classpath Fri Sep 23 07:09:06 2011
@@ -0,0 +1,29 @@
+<classpath>
+  <classpathentry kind="src" path="src/test/java" including="**/*.java"/>
+  <classpathentry kind="src" path="src/test/resources" excluding="**/*.java"/>
+  <classpathentry kind="src" path="src/main/java" including="**/*.java"/>
+  <classpathentry kind="src" path="src/main/resources" excluding="**/*.java"/>
+  <classpathentry kind="src" path="target/maven-shared-archive-resources" excluding="**/*.java"/>
+  <classpathentry kind="output" path="target/classes-eclipse"/>
+  <classpathentry kind="var" path="M2_REPO/com/hp/hpl/jena/arq/2.8.9-SNAPSHOT/arq-2.8.9-SNAPSHOT.jar" sourcepath="M2_REPO/com/hp/hpl/jena/arq/2.8.9-SNAPSHOT/arq-2.8.9-SNAPSHOT-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/commons-codec/commons-codec/1.4/commons-codec-1.4.jar" sourcepath="M2_REPO/commons-codec/commons-codec/1.4/commons-codec-1.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpclient/4.1.2/httpclient-4.1.2.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpclient/4.1.2/httpclient-4.1.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpcore/4.1.2/httpcore-4.1.2.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpcore/4.1.2/httpcore-4.1.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/ibm/icu/icu4j/3.4.4/icu4j-3.4.4.jar" sourcepath="M2_REPO/com/ibm/icu/icu4j/3.4.4/icu4j-3.4.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/hp/hpl/jena/iri/0.8/iri-0.8.jar" sourcepath="M2_REPO/com/hp/hpl/jena/iri/0.8/iri-0.8-sources.jar">
+    <attributes>
+      <attribute value="jar:file:/home/castagna/.m2/repository/com/hp/hpl/jena/iri/0.8/iri-0.8-javadoc.jar!/" name="javadoc_location"/>
+    </attributes>
+  </classpathentry>
+  <classpathentry kind="var" path="M2_REPO/com/hp/hpl/jena/jena/2.6.4/jena-2.6.4.jar" sourcepath="M2_REPO/com/hp/hpl/jena/jena/2.6.4/jena-2.6.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/junit/junit/4.8.2/junit-4.8.2.jar" sourcepath="M2_REPO/junit/junit/4.8.2/junit-4.8.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/log4j/log4j/1.2.16/log4j-1.2.16.jar" sourcepath="M2_REPO/log4j/log4j/1.2.16/log4j-1.2.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/lucene/lucene-core/2.3.1/lucene-core-2.3.1.jar" sourcepath="M2_REPO/org/apache/lucene/lucene-core/2.3.1/lucene-core-2.3.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-api/1.6.1/slf4j-api-1.6.1.jar" sourcepath="M2_REPO/org/slf4j/slf4j-api/1.6.1/slf4j-api-1.6.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-log4j12/1.5.8/slf4j-log4j12-1.5.8.jar" sourcepath="M2_REPO/org/slf4j/slf4j-log4j12/1.5.8/slf4j-log4j12-1.5.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/stax/stax-api/1.0.1/stax-api-1.0.1.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/hp/hpl/jena/tdb/0.8.11-SNAPSHOT/tdb-0.8.11-SNAPSHOT.jar" sourcepath="M2_REPO/com/hp/hpl/jena/tdb/0.8.11-SNAPSHOT/tdb-0.8.11-SNAPSHOT-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/codehaus/woodstox/wstx-asl/3.2.9/wstx-asl-3.2.9.jar" sourcepath="M2_REPO/org/codehaus/woodstox/wstx-asl/3.2.9/wstx-asl-3.2.9-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/xerces/xercesImpl/2.7.1/xercesImpl-2.7.1.jar"/>
+  <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+</classpath>
\ No newline at end of file

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/.classpath
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/.project
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/.project?rev=1174559&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/.project (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/.project Fri Sep 23 07:09:06 2011
@@ -0,0 +1,13 @@
+<projectDescription>
+  <name>tdbloader2</name>
+  <comment>tdbloader2 - this is an experimental (pure Java) implementation. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse.</comment>
+  <projects/>
+  <buildSpec>
+    <buildCommand>
+      <name>org.eclipse.jdt.core.javabuilder</name>
+    </buildCommand>
+  </buildSpec>
+  <natures>
+    <nature>org.eclipse.jdt.core.javanature</nature>
+  </natures>
+</projectDescription>
\ No newline at end of file

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/.project
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/.settings/org.eclipse.jdt.core.prefs
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/.settings/org.eclipse.jdt.core.prefs?rev=1174559&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/.settings/org.eclipse.jdt.core.prefs (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/.settings/org.eclipse.jdt.core.prefs Fri Sep 23 07:09:06 2011
@@ -0,0 +1,9 @@
+#Fri Sep 23 08:01:23 BST 2011
+encoding//src/test/java=UTF-8
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+eclipse.preferences.version=1
+encoding//src/test/resources=UTF-8
+org.eclipse.jdt.core.compiler.source=1.6
+encoding//src/main/java=UTF-8
+encoding//src/main/resources=UTF-8
+org.eclipse.jdt.core.compiler.compliance=1.6

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/.settings/org.eclipse.jdt.core.prefs
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/README
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/README?rev=1174559&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/README (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/README Fri Sep 23 07:09:06 2011
@@ -0,0 +1,5 @@
+tdbloader2
+----------
+
+This is an experimental (pure Java) version of tdbloader2.
+See also: https://issues.apache.org/jira/browse/JENA-117
\ No newline at end of file

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/pom.xml
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/pom.xml?rev=1174559&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/pom.xml (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/pom.xml Fri Sep 23 07:09:06 2011
@@ -0,0 +1,294 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache</groupId>
+    <artifactId>apache</artifactId>
+    <version>9</version>
+  </parent> 
+
+  <groupId>org.apache.jena</groupId>
+  <artifactId>tdbloader2</artifactId>
+  <packaging>jar</packaging>
+  <name>tdbloader2</name>
+  <version>0.1-incubating-SNAPSHOT</version>
+
+  <description>tdbloader2 - this is an experimental (pure Java) implementation</description>
+  <url>https://svn.apache.org/repos/asf/incubator/jena/Scratch/PC/tdbloader2/</url>
+
+  <licenses>
+    <license>
+      <name>The Apache Software License, Version 2.0</name>
+      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+      <distribution>repo</distribution>
+    </license>
+  </licenses>
+
+  <organization>
+    <name>The Apache Software Foundation</name>
+    <url>http://www.apache.org/</url>
+  </organization>
+
+  <scm>
+    <connection>scm:svn:http://svn.apache.org/repos/asf/incubator/jena/Scratch/PC/tdbloader2/trunk</connection>
+    <developerConnection>scm:svn:https://svn.apache.org/repos/asf/incubator/jena/Scratch/PC/tdbloader2/trunk</developerConnection>
+    <url>https://svn.apache.org/repos/asf/incubator/jena/Scratch/PC/tdbloader2/</url>
+  </scm>
+
+  <properties>
+    <tdb.version>0.8.11-SNAPSHOT</tdb.version>
+    <slf4j.version>1.6.1</slf4j.version>
+    <junit.version>4.8.2</junit.version>
+    <jdk.version>1.6</jdk.version>
+    <targetJdk>${jdk.version}</targetJdk> <!-- MPMD-86 workaround -->
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <dependencies>
+
+    <dependency>
+      <groupId>com.hp.hpl.jena</groupId>
+      <artifactId>tdb</artifactId>
+      <version>${tdb.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+
+  <build>
+
+    <plugins>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <source>${jdk.version}</source>
+          <target>${jdk.version}</target>
+          <encoding>${project.build.sourceEncoding}</encoding>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <includes>
+            <include>**/TS_*.java</include>
+          </includes>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+        <version>2.1.2</version>
+        <executions>
+          <execution>
+            <id>attach-sources</id>
+            <phase>package</phase>
+            <goals>
+              <goal>jar-no-fork</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>attach-javadocs</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <version>true</version>
+          <show>public</show>
+          <quiet>true</quiet>
+          <encoding>${project.build.sourceEncoding}</encoding>
+          <windowtitle>${project.name} ${project.version}</windowtitle>
+          <doctitle>${project.name} ${project.version}</doctitle>
+          <!-- Exclude the implementation -->
+          <includePackageNames>org.apache.jena.larq</includePackageNames>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-eclipse-plugin</artifactId>
+        <version>2.8</version>
+        <configuration>
+          <buildOutputDirectory>${project.build.directory}/classes-eclipse</buildOutputDirectory>
+          <downloadSources>true</downloadSources>
+          <downloadJavadocs>false</downloadJavadocs>
+          <useProjectReferences>false</useProjectReferences>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-release-plugin</artifactId>
+        <configuration>
+          <remoteTagging>true</remoteTagging>
+          <preparationGoals>clean install</preparationGoals>
+          <autoVersionSubmodules>true</autoVersionSubmodules>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <exclude>**/CHANGES.txt</exclude>
+            <exclude>**/DISCLAIMER.txt</exclude>
+            <exclude>**/TODO.txt</exclude>
+            <exclude>**/.svn/**</exclude>
+            <exclude>.git/**</exclude>
+            <exclude>.gitignore</exclude>
+            <exclude>**/.project</exclude>
+            <exclude>**/target/**</exclude>
+            <exclude>src/test/resources/**</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+
+    </plugins>
+
+  </build>
+  
+  <repositories>
+    <repository>
+      <id>repo-jena-dev</id>
+      <url>http://openjena.org/repo-dev</url>
+      <snapshots>
+        <enabled>true</enabled>
+      </snapshots>
+    </repository>
+  </repositories>
+
+  <issueManagement>
+    <system>JIRA</system>
+    <url>http://issues.apache.org/jira/browse/JENA</url>
+  </issueManagement>
+
+  <reporting>
+
+    <plugins>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-report-plugin</artifactId>
+        <version>2.7.2</version>
+        <configuration>
+          <outputDirectory>${project.basedir}/target/surefire-reports-html</outputDirectory>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>cobertura-maven-plugin</artifactId>
+        <version>2.5.1</version>
+        <configuration>
+          <formats>
+            <format>html</format>
+            <format>xml</format>
+          </formats>
+          <instrumentation>
+            <ignores>
+              <ignore>org.slf4j.*</ignore>
+            </ignores>
+          </instrumentation>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.6</version>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jxr-plugin</artifactId>
+        <version>2.3</version>
+      </plugin>
+
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>findbugs-maven-plugin</artifactId>
+        <version>2.3.1</version>
+        <configuration>
+          <omitVisitors>Naming</omitVisitors>
+          <onlyAnalyze>org.apache.jena.larq.*</onlyAnalyze>
+          <findbugsXmlOutput>true</findbugsXmlOutput>
+          <findbugsXmlWithMessages>true</findbugsXmlWithMessages>
+          <xmlOutput>true</xmlOutput>
+        </configuration>
+      </plugin>     
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-pmd-plugin</artifactId>
+        <version>2.4</version>
+        <configuration>
+          <linkXref>true</linkXref>
+          <sourceEncoding>${project.build.sourceEncoding}</sourceEncoding>
+          <minimumTokens>100</minimumTokens>
+          <targetJdk>${jdk.version}</targetJdk>
+          <failOnViolation>false</failOnViolation>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <version>0.7</version>
+        <configuration>
+          <excludes>
+            <exclude>**/CHANGES.txt</exclude>
+            <exclude>**/DISCLAIMER.txt</exclude>
+            <exclude>**/TODO.txt</exclude>
+            <exclude>**/.svn/**</exclude>
+            <exclude>.git/**</exclude>
+            <exclude>.gitignore</exclude>
+            <exclude>**/.project</exclude>
+            <exclude>**/target/**</exclude>
+            <exclude>src/test/resources/**</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+
+    </plugins>
+
+  </reporting>
+
+</project>

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/pom.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/cmd/tdbloader2.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/cmd/tdbloader2.java?rev=1174559&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/cmd/tdbloader2.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/cmd/tdbloader2.java Fri Sep 23 07:09:06 2011
@@ -0,0 +1,915 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package cmd;
+
+import static com.hp.hpl.jena.sparql.util.Utils.nowAsString;
+import static com.hp.hpl.jena.tdb.lib.NodeLib.setHash;
+import static com.hp.hpl.jena.tdb.sys.SystemTDB.LenNodeHash;
+import static com.hp.hpl.jena.tdb.sys.SystemTDB.SizeOfLong;
+import static com.hp.hpl.jena.tdb.sys.SystemTDB.SizeOfNodeId;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.jena.tdbloader2.MultiThreadedSortedDataBag;
+import org.apache.jena.tdbloader2.ProgressLogger;
+import org.openjena.atlas.AtlasException;
+import org.openjena.atlas.data.DataBag;
+import org.openjena.atlas.data.SerializationFactory;
+import org.openjena.atlas.data.ThresholdPolicyCount;
+import org.openjena.atlas.io.IO;
+import org.openjena.atlas.iterator.Iter;
+import org.openjena.atlas.iterator.Transform;
+import org.openjena.atlas.lib.Bytes;
+import org.openjena.atlas.lib.Closeable;
+import org.openjena.atlas.lib.ColumnMap;
+import org.openjena.atlas.lib.FileOps;
+import org.openjena.atlas.lib.Pair;
+import org.openjena.atlas.lib.Sink;
+import org.openjena.atlas.lib.Tuple;
+import org.openjena.atlas.logging.Log;
+import org.openjena.riot.Lang;
+import org.openjena.riot.RiotLoader;
+import org.openjena.riot.out.NodeToLabel;
+import org.openjena.riot.out.OutputLangUtils;
+import org.openjena.riot.system.ParserProfile;
+import org.openjena.riot.system.RiotLib;
+import org.openjena.riot.system.SinkExtendTriplesToQuads;
+import org.openjena.riot.tokens.Token;
+import org.openjena.riot.tokens.Tokenizer;
+import org.openjena.riot.tokens.TokenizerFactory;
+import org.slf4j.Logger;
+
+import tdb.cmdline.CmdTDB;
+import arq.cmd.CmdException;
+import arq.cmdline.ArgDecl;
+import arq.cmdline.CmdGeneral;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.core.Quad;
+import com.hp.hpl.jena.sparql.util.Utils;
+import com.hp.hpl.jena.tdb.TDB;
+import com.hp.hpl.jena.tdb.base.block.BlockMgr;
+import com.hp.hpl.jena.tdb.base.block.BlockMgrFactory;
+import com.hp.hpl.jena.tdb.base.file.FileFactory;
+import com.hp.hpl.jena.tdb.base.file.FileSet;
+import com.hp.hpl.jena.tdb.base.file.Location;
+import com.hp.hpl.jena.tdb.base.objectfile.ObjectFile;
+import com.hp.hpl.jena.tdb.base.record.Record;
+import com.hp.hpl.jena.tdb.base.record.RecordFactory;
+import com.hp.hpl.jena.tdb.index.bplustree.BPlusTree;
+import com.hp.hpl.jena.tdb.index.bplustree.BPlusTreeParams;
+import com.hp.hpl.jena.tdb.index.bplustree.BPlusTreeRewriter;
+import com.hp.hpl.jena.tdb.lib.NodeLib;
+import com.hp.hpl.jena.tdb.nodetable.NodeTable;
+import com.hp.hpl.jena.tdb.nodetable.NodeTupleTable;
+import com.hp.hpl.jena.tdb.solver.stats.Stats;
+import com.hp.hpl.jena.tdb.solver.stats.StatsCollectorNodeId;
+import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
+import com.hp.hpl.jena.tdb.store.Hash;
+import com.hp.hpl.jena.tdb.store.NodeId;
+import com.hp.hpl.jena.tdb.store.bulkloader.BulkLoader;
+import com.hp.hpl.jena.tdb.sys.Names;
+import com.hp.hpl.jena.tdb.sys.SetupTDB;
+import com.hp.hpl.jena.tdb.sys.SystemTDB;
+
+public class tdbloader2 extends CmdGeneral
+{
+    static { Log.setLog4j() ; }
+    private static Logger cmdLog =TDB.logLoader ;
+
+    private static ArgDecl argLocation = new ArgDecl(ArgDecl.HasValue, "loc", "location") ;
+    private static ArgDecl argCompression = new ArgDecl(ArgDecl.NoValue, "comp", "compression") ;
+    private static ArgDecl argBufferSize = new ArgDecl(ArgDecl.HasValue, "buf", "buffer-size") ;
+    private static ArgDecl argGzipOutside = new ArgDecl(ArgDecl.NoValue, "gzip-outside") ;
+    private static ArgDecl argSpillSize = new ArgDecl(ArgDecl.HasValue, "spill", "spill-size") ;
+    private static ArgDecl argNoStats = new ArgDecl(ArgDecl.NoValue, "no-stats") ;
+    private static ArgDecl argNoBuffer = new ArgDecl(ArgDecl.NoValue, "no-buffer") ;
+
+    private String locationString ;
+    private List<String> datafiles ;
+    private Location location ;
+    private static boolean compression ;
+    private static boolean gzip_outside = false ;
+    private static int buffer_size = 8192 ;
+    private static int spill_size = 1000000 ;
+    private static boolean no_stats = false ;
+    private static boolean no_buffer = false ;
+    
+    private ThresholdPolicyCount<Tuple<Long>> policy ;
+    private Comparator<Tuple<Long>> comparator = new TupleComparator();
+    
+    public static void main(String...argv)
+    {
+        CmdTDB.setLogging() ;
+        SetupTDB.setOptimizerWarningFlag(false) ;
+        new tdbloader2(argv).mainRun() ;
+    }
+    
+    public tdbloader2(String...argv)
+    {
+        super(argv) ;
+        super.add(argLocation, "--loc", "Location") ;
+        super.add(argCompression, "--compression", "Use compression for intermediate files") ;
+        super.add(argBufferSize, "--buffer-size", "The size of buffers for IO in bytes") ;
+        super.add(argGzipOutside, "--gzip-outside", "GZIP...(Buffered...())") ;
+        super.add(argSpillSize, "--spill-size", "The size of spillable segments in tuples|records") ;
+        super.add(argNoStats, "--no-stats", "Do not generate the stats file") ;
+        super.add(argNoBuffer, "--no-buffer", "Do not use Buffered{Input|Output}Stream") ;
+    }
+        
+    @Override
+    protected void processModulesAndArgs()
+    {
+        if ( !super.contains(argLocation) ) throw new CmdException("Required: --loc DIR") ;
+        
+        locationString   = super.getValue(argLocation) ;
+        location = new Location(locationString) ;
+        compression = super.hasArg(argCompression) ;
+        if ( super.hasArg(argBufferSize) ) 
+            buffer_size = Integer.valueOf(super.getValue(argBufferSize)) ;
+        gzip_outside = super.hasArg(argGzipOutside) ;
+        if ( super.hasArg(argSpillSize) ) 
+            spill_size = Integer.valueOf(super.getValue(argSpillSize)) ;
+        no_stats = super.hasArg(argNoStats) ;
+        no_buffer = super.hasArg(argNoBuffer) ;
+
+        datafiles  = super.getPositional() ;
+
+        for( String filename : datafiles)
+        {
+            Lang lang = Lang.guess(filename, Lang.NQUADS) ;
+            if ( lang == null )
+                // Does not happen due to default above.
+                cmdError("File suffix not recognized: " +filename) ;
+            if ( ! FileOps.exists(filename) )
+                cmdError("File does not exist: "+filename) ;
+        }
+        
+        policy = new ThresholdPolicyCount<Tuple<Long>>(spill_size);
+    }
+    
+    @Override
+    protected void exec()
+    {
+        // This formats the location correctly.
+        DatasetGraphTDB dsg = SetupTDB.buildDataset(location) ;
+        
+        // so close indexes and the prefix table.
+        dsg.getTripleTable().getNodeTupleTable().getTupleTable().close();
+        dsg.getQuadTable().getNodeTupleTable().getTupleTable().close();
+        // Later - attach prefix table to parser.
+        dsg.getPrefixes().close() ;
+        
+        ProgressLogger monitorTotal = new ProgressLogger(cmdLog, "tuples", BulkLoader.DataTickPoint,BulkLoader.superTick) ;
+
+        DataBag<Tuple<Long>> outputTriples = new MultiThreadedSortedDataBag<Tuple<Long>>(policy, new TripleSerializationFactory(), comparator);
+        DataBag<Tuple<Long>> outputQuads = new MultiThreadedSortedDataBag<Tuple<Long>>(policy, new QuadSerializationFactory(), comparator);
+        
+        // Node table and input data using node ids (rather than RDF node values)
+        Sink<Quad> sink = new NodeTableBuilder2(dsg, monitorTotal, outputTriples, outputQuads) ; 
+        Sink<Triple> sink2 = new SinkExtendTriplesToQuads(sink) ;
+        
+        monitorTotal.start() ;
+        for( String filename : datafiles)
+        {
+            if ( datafiles.size() > 0 )
+                cmdLog.info("Load: "+filename+" -- "+Utils.nowAsString()) ;
+            
+            InputStream in = IO.openFile(filename) ;
+            Lang lang = Lang.guess(filename, Lang.NQUADS) ;
+            if ( lang.isTriples() )
+                RiotLoader.readTriples(in, lang, null, sink2) ;
+            else
+                RiotLoader.readQuads(in, lang, null, sink) ;
+            IO.close(in) ; // TODO: final {}
+        }
+        sink.close() ;
+
+//        dsg.sync() ;
+//        dsg.close() ;
+        
+        BPlusTree bptGSPO = createBPlusTreeIndex(Names.primaryIndexQuads, outputQuads) ;
+        BPlusTree bptSPO = createBPlusTreeIndex(Names.primaryIndexTriples, outputTriples) ;
+
+        // Secondary POS and OSP indexes
+        for ( String indexName : Names.tripleIndexes ) {
+        	if ( !indexName.equals(Names.primaryIndexTriples) ) {
+        		createBPlusTreeIndex(indexName, new ColumnMap(Names.primaryIndexTriples, indexName), bptSPO) ;
+        	}
+        }
+
+        // Secondary GPOS, GOSP, POSG and OSPG indexes
+        for ( String indexName : Names.quadIndexes ) {
+        	if ( !indexName.equals(Names.primaryIndexQuads) ) {
+        		createBPlusTreeIndex(indexName, new ColumnMap(Names.primaryIndexQuads, indexName), bptGSPO) ;
+        	}
+        }
+        
+        outputTriples.close() ;
+        outputQuads.close() ;
+        
+
+        if ( !no_stats ) {
+            if ( ! location.isMem() ) {
+                dsg = SetupTDB.buildDataset(location) ;
+                Stats.write(dsg, ((NodeTableBuilder2)sink).getCollector()) ;
+            }            
+        }
+
+        
+        // ---- Monitor
+        print ( monitorTotal ) ;
+    }
+    
+    private static void print ( ProgressLogger monitor ) {
+        long time = monitor.finish() ;
+
+        long total = monitor.getTicks() ;
+        float elapsedSecs = time/1000F ;
+        float rate = (elapsedSecs!=0) ? total/elapsedSecs : 0 ;
+        String str =  String.format("Total: %,d tuples : %,.2f seconds : %,.2f tuples/sec [%s]", total, elapsedSecs, rate, nowAsString()) ;
+        cmdLog.info(str) ;
+    }
+    
+    private BPlusTree createBPlusTreeIndex(String indexName, DataBag<Tuple<Long>> tuples) {
+        deleteExistingBPlusTreeIndex(indexName) ;
+        
+    	final int size = indexName.length() ;
+
+    	if ( ( size != 3 ) && ( size != 4 ) ) throw new AtlasException("Unsupported size.") ;
+    	
+    	final RecordFactory recordFactory ;
+    	if ( size == 3 ) {
+    		recordFactory = new RecordFactory(SystemTDB.LenIndexTripleRecord, 0) ;
+    	} else {
+    		recordFactory = new RecordFactory(SystemTDB.LenIndexQuadRecord, 0) ;
+    	}
+    	
+        int order = BPlusTreeParams.calcOrder(SystemTDB.BlockSize, recordFactory) ;
+        BPlusTreeParams bptParams = new BPlusTreeParams(order, recordFactory) ;
+        
+        int readCacheSize = 10 ;
+        int writeCacheSize = 100 ;
+        
+        FileSet destination = new FileSet(location, indexName) ;
+        BlockMgr blkMgrNodes = BlockMgrFactory.create(destination, Names.bptExt1, SystemTDB.BlockSize, readCacheSize, writeCacheSize) ;
+        BlockMgr blkMgrRecords = BlockMgrFactory.create(destination, Names.bptExt2, SystemTDB.BlockSize, readCacheSize, writeCacheSize) ;
+        
+        cmdLog.info("Index: creating " + indexName + " index...") ;
+        final ProgressLogger monitor = new ProgressLogger(cmdLog, "records to " + indexName, BulkLoader.DataTickPoint,BulkLoader.superTick) ;
+        monitor.start() ;
+        
+        Transform<Tuple<Long>, Record> transformTuple2Record = new Transform<Tuple<Long>, Record>() {
+			@Override public Record convert(Tuple<Long> tuple) {
+				Record record = recordFactory.create() ;
+				for ( int i = 0; i < size; i++ ) {
+					Bytes.setLong(tuple.get(i), record.getKey(), i * SystemTDB.SizeOfLong) ;					
+				}
+				monitor.tick() ;
+				return record ;
+			}
+        };
+        Iterator<Record> iter = Iter.iter(tuples.iterator()).map(transformTuple2Record) ;
+        BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(iter, bptParams, recordFactory, blkMgrNodes, blkMgrRecords) ;
+        bpt2.sync() ;
+
+        print ( monitor ) ;
+        
+        return bpt2 ;
+    }
+    
+    private void createBPlusTreeIndex(String indexName, final ColumnMap colMap, BPlusTree bpt) {
+    	final int size = indexName.length() ;
+
+    	if ( ( size != 3 ) && ( size != 4 ) ) throw new AtlasException("Unsupported size.") ;
+    	
+    	DataBag<Tuple<Long>> outTuples ;
+    	if ( size == 3 ) {
+    		outTuples = new MultiThreadedSortedDataBag<Tuple<Long>>(policy, new TripleSerializationFactory(), comparator);
+    	} else {
+    		outTuples = new MultiThreadedSortedDataBag<Tuple<Long>>(policy, new QuadSerializationFactory(), comparator);
+    	}
+    	
+        cmdLog.info("Index: sorting data for " + indexName + " index...") ;
+        final ProgressLogger monitor = new ProgressLogger(cmdLog, "records to " + indexName, BulkLoader.DataTickPoint,BulkLoader.superTick) ;
+        monitor.start() ;
+    	
+        Transform<Record, Tuple<Long>> transformTuple2Tuple = new Transform<Record, Tuple<Long>>() {
+			@Override public Tuple<Long> convert(Record record) {
+		        Long[] ids = new Long[size] ;
+		        for ( int i = 0 ; i < size ; i++ ) {
+		            ids[colMap.fetchSlotIdx(i)] = Bytes.getLong(record.getKey(), i*SizeOfLong) ;
+		        }
+		        monitor.tick() ;
+		        return Tuple.create(ids) ;
+			}
+        };
+        outTuples.addAll(Iter.iter(bpt.iterator()).map(transformTuple2Tuple).iterator()) ;
+        
+        print ( monitor ) ;
+        
+        createBPlusTreeIndex(indexName, outTuples) ;
+        
+        outTuples.close() ;
+        outTuples = null ;
+    }
+    
+    private void deleteExistingBPlusTreeIndex(String indexName) {
+        FileOps.delete(location.absolute(indexName, Names.bptExt1)) ;
+        FileOps.delete(location.absolute(indexName, Names.bptExt2)) ;
+    }
+
+    static class NodeTableBuilder implements Sink<Quad>
+    {
+        private NodeTable nodeTable ;
+        private DataBag<Tuple<Long>> outputTriples ;
+        private DataBag<Tuple<Long>> outputQuads ;
+        private ProgressLogger monitor ;
+        private StatsCollectorNodeId stats ;
+
+        NodeTableBuilder(DatasetGraphTDB dsg, ProgressLogger monitor, DataBag<Tuple<Long>> outputTriples, DataBag<Tuple<Long>> outputQuads)
+        {
+            this.monitor = monitor ;
+            NodeTupleTable ntt = dsg.getTripleTable().getNodeTupleTable() ; 
+            this.nodeTable = ntt.getNodeTable() ;
+            this.outputTriples = outputTriples ; 
+            this.outputQuads = outputQuads ; 
+            this.stats = new StatsCollectorNodeId() ;
+        }
+        
+        @Override
+        public void send(Quad quad)
+        {
+            Node s = quad.getSubject() ;
+            Node p = quad.getPredicate() ;
+            Node o = quad.getObject() ;
+            Node g = null ;
+            // Union graph?!
+            if ( ! quad.isTriple() && ! quad.isDefaultGraph() )
+                g = quad.getGraph() ;
+            
+            NodeId sId = nodeTable.getAllocateNodeId(s) ; 
+            NodeId pId = nodeTable.getAllocateNodeId(p) ;
+            NodeId oId = nodeTable.getAllocateNodeId(o) ;
+            
+            if ( g != null )
+            {
+                NodeId gId = nodeTable.getAllocateNodeId(g) ;
+                outputQuads.send(Tuple.create(gId.getId(), sId.getId(), pId.getId(), oId.getId())) ;
+                if ( !no_stats ) stats.record(gId, sId, pId, oId) ;
+            }
+            else
+            {
+                outputTriples.send(Tuple.create(sId.getId(), pId.getId(), oId.getId())) ;
+                if ( !no_stats ) stats.record(null, sId, pId, oId) ;
+            }
+            monitor.tick() ;
+        }
+
+        @Override
+        public void flush()
+        {
+            outputTriples.flush() ;
+            outputQuads.flush() ;
+            nodeTable.sync() ;
+        }
+
+        @Override
+        public void close() { 
+            flush() ;
+        }
+        
+        public StatsCollectorNodeId getCollector() { return stats ; }
+    }
+    
+    static class NodeTableBuilder2 implements Sink<Quad>
+    {
+        private DatasetGraphTDB dsg ;
+        private ObjectFile objects ;
+        private DataBag<Tuple<Long>> outputTriples ;
+        private DataBag<Tuple<Long>> outputQuads ;
+        private ProgressLogger monitor ;
+        private StatsCollectorNodeId stats ;
+        
+        private ThresholdPolicyCount<Pair<byte[], byte[]>> policy = new ThresholdPolicyCount<Pair<byte[], byte[]>>(spill_size) ;
+        private DataBag<Pair<byte[], byte[]>> sdb01 ;
+        private DataBag<Pair<byte[], byte[]>> sdb02 ;
+        private DataBag<Pair<byte[], byte[]>> sdb03 ;
+        
+        private MessageDigest digest ;
+
+        NodeTableBuilder2(DatasetGraphTDB dsg, ProgressLogger monitor, DataBag<Tuple<Long>> outputTriples, DataBag<Tuple<Long>> outputQuads)
+        {
+//            dsg.getTripleTable().getNodeTupleTable().close() ; 
+
+            this.dsg = dsg ;
+            this.monitor = monitor ;
+
+            String filename = new FileSet(dsg.getLocation(), Names.indexId2Node).filename(Names.extNodeData) ;
+            this.objects = FileFactory.createObjectFileDisk(filename) ; 
+            
+            this.outputTriples = outputTriples ; 
+            this.outputQuads = outputQuads ; 
+            this.stats = new StatsCollectorNodeId() ;
+            
+            this.sdb01 = new MultiThreadedSortedDataBag<Pair<byte[], byte[]>>(policy, new PairSerializationFactory(), new PairComparator());
+            this.sdb02 = new MultiThreadedSortedDataBag<Pair<byte[], byte[]>>(policy, new PairSerializationFactory(), new PairComparator());
+            this.sdb03 = new MultiThreadedSortedDataBag<Pair<byte[], byte[]>>(policy, new PairSerializationFactory(), new PairComparator());
+            
+            try {
+                this.digest = MessageDigest.getInstance("MD5") ;               
+            } catch (NoSuchAlgorithmException e) {
+                throw new AtlasException(e) ;
+            }
+
+        }
+        
+        @Override
+        public void send(Quad quad)
+        {
+            Node s = quad.getSubject() ;
+            Node p = quad.getPredicate() ;
+            Node o = quad.getObject() ;
+            Node g = null ;
+            // Union graph?!
+            if ( ! quad.isTriple() && ! quad.isDefaultGraph() )
+                g = quad.getGraph() ;
+            
+            try {
+                digest.reset() ;
+                digest.update(s.toString().getBytes("UTF-8")) ; // TODO: should we do something better here?
+                digest.update(p.toString().getBytes("UTF-8")) ;
+                digest.update(o.toString().getBytes("UTF-8")) ;
+                if ( g != null )
+                    digest.update(g.toString().getBytes("UTF-8")) ;
+
+                String md5 = new String(Hex.encodeHex(digest.digest())) ;
+                sdb01.add(new Pair<byte[], byte[]>(serialize(s).getBytes("UTF-8"), (md5 + "|s").getBytes("UTF-8"))) ;
+                sdb01.add(new Pair<byte[], byte[]>(serialize(p).getBytes("UTF-8"), (md5 + "|p").getBytes("UTF-8"))) ;
+                sdb01.add(new Pair<byte[], byte[]>(serialize(o).getBytes("UTF-8"), (md5 + "|o").getBytes("UTF-8"))) ;
+                if ( g != null )
+                    sdb01.add(new Pair<byte[], byte[]>(serialize(g).getBytes("UTF-8"), (md5 + "|g").getBytes("UTF-8"))) ;
+            } catch (UnsupportedEncodingException e) {
+                throw new AtlasException(e) ;
+            }
+
+            monitor.tick() ;
+        }
+
+        @Override
+        public void flush()
+        {
+            // TODO
+        }
+
+        @Override
+        public void close() { 
+        	flush() ;
+        	
+            try {
+                cmdLog.info("Node Table (1/3): building nodes.dat and sorting hash|id ...") ;
+                ProgressLogger monitor01 = new ProgressLogger(cmdLog, "records for node table (1/3) phase", BulkLoader.DataTickPoint,BulkLoader.superTick) ;
+                monitor01.start() ;
+                String curr = null ;
+                long id = -1L;
+                Iterator<Pair<byte[], byte[]>> iter01 = sdb01.iterator() ;
+                while ( iter01.hasNext() ) {
+                    Pair<byte[], byte[]> pair01 = iter01.next() ;
+                    String leftIn = new String(pair01.getLeft(), "UTF-8") ;
+                    String rightIn = new String(pair01.getRight(), "UTF-8") ;
+                    if ( ! leftIn.equals(curr) ) {
+                        curr = leftIn ;
+                        // generate the node id
+                        Node node = parse(leftIn) ;
+                        id = NodeLib.encodeStore(node, objects) ;
+                        // add to hash|id
+                        Hash hash = new Hash(SystemTDB.LenNodeHash);
+                        setHash(hash, node);
+                        sdb03.add (new Pair<byte[], byte[]>(hash.getBytes(), Bytes.packLong(id))) ;
+                    }
+//                    System.out.println ("< ( " + leftIn + ", " + rightIn + " )") ;
+                    String tokens[] = rightIn.split("\\|") ;
+                    String leftOut = tokens[0] ;
+                    String rightOut = id + "|" + tokens[1] ;
+//                    System.out.println ("> ( " + leftOut + ", " + rightOut + " )") ;
+                    Pair<byte[], byte[]> pair02 = new Pair<byte[], byte[]>(leftOut.getBytes("UTF-8"), rightOut.getBytes("UTF-8")) ;
+                    sdb02.add(pair02) ;
+                    monitor01.tick() ;
+                }
+                sdb01.close() ;
+                sdb01 = null ;
+                print ( monitor01 ) ;
+                
+                cmdLog.info("Node Table (2/3): generating input data using node ids...") ;
+                final ProgressLogger monitor02 = new ProgressLogger(cmdLog, "records for node table (2/3) phase", BulkLoader.DataTickPoint,BulkLoader.superTick) ;
+                monitor02.start() ;
+                Iterator<Pair<byte[], byte[]>> iter02 = sdb02.iterator() ;
+                curr = null ;
+                Long s = null ;
+                Long p = null ;
+                Long o = null ;
+                Long g = null ;
+                while ( iter02.hasNext() ) {
+                    Pair<byte[], byte[]> pair02 = iter02.next() ;
+                    String leftIn = new String(pair02.getLeft(), "UTF-8") ;
+                    String rightIn = new String(pair02.getRight(), "UTF-8") ;
+//                    System.out.println ("< ( " + leftIn + ", " + rightIn + " )") ;
+                    if ( curr == null ) curr = leftIn ;
+                    if ( ! leftIn.equals(curr) ) {
+                        curr = leftIn ;
+                        write (g, s, p, o) ;
+                        s = null ;
+                        p = null ;
+                        o = null ;
+                        g = null ;
+                        monitor02.tick() ;
+                    }
+                    String tokens[] = rightIn.split("\\|") ;
+                    if ( "s".equals(tokens[1]) ) s = Long.parseLong(tokens[0]) ;
+                    else if ( "p".equals(tokens[1]) ) p = Long.parseLong(tokens[0]) ;
+                    else if ( "o".equals(tokens[1]) ) o = Long.parseLong(tokens[0]) ;
+                    else if ( "g".equals(tokens[1]) ) g = Long.parseLong(tokens[0]) ;
+                    
+                }
+                write (g, s, p, o) ; // ensure we write the last triple|quad
+                sdb02.close() ;
+                sdb02 = null ;
+                print ( monitor02 ) ;
+            } catch (UnsupportedEncodingException e) {
+                throw new AtlasException(e) ;
+            }
+            
+            
+            // Node table B+Tree index (i.e. node2id.dat/idn)
+            cmdLog.info("Node Table (3/3): building node table B+Tree index (i.e. node2id.dat and node2id.idn files)...") ;
+            final ProgressLogger monitor03 = new ProgressLogger(cmdLog, "records for node table (3/3) phase", BulkLoader.DataTickPoint,BulkLoader.superTick) ;
+            monitor03.start() ;
+            String path = dsg.getLocation().getDirectoryPath() ;
+            new File(path, "node2id.dat").delete() ;
+            new File(path, "node2id.idn").delete() ;
+            
+            final RecordFactory recordFactory = new RecordFactory(LenNodeHash, SizeOfNodeId) ;
+            Transform<Pair<byte[], byte[]>, Record> transformPair2Record = new Transform<Pair<byte[], byte[]>, Record>() {
+                @Override public Record convert(Pair<byte[], byte[]> pair) {
+                    monitor03.tick() ;
+                    return recordFactory.create(pair.getLeft(), pair.getRight()) ;
+                }
+            };
+
+            int order = BPlusTreeParams.calcOrder(SystemTDB.BlockSize, recordFactory) ;
+            BPlusTreeParams bptParams = new BPlusTreeParams(order, recordFactory) ;
+            int readCacheSize = 10 ;
+            int writeCacheSize = 100 ;
+            FileSet destination = new FileSet(dsg.getLocation(), Names.indexNode2Id) ;
+            BlockMgr blkMgrNodes = BlockMgrFactory.create(destination, Names.bptExt1, SystemTDB.BlockSize, readCacheSize, writeCacheSize) ;
+            BlockMgr blkMgrRecords = BlockMgrFactory.create(destination, Names.bptExt2, SystemTDB.BlockSize, readCacheSize, writeCacheSize) ;
+            Iterator<Record> iter2 = Iter.iter(sdb03.iterator()).map(transformPair2Record) ;
+            BPlusTree bpt2 = BPlusTreeRewriter.packIntoBPlusTree(iter2, bptParams, recordFactory, blkMgrNodes, blkMgrRecords) ;
+            bpt2.sync() ;
+            sdb03.close() ;
+            sdb03 = null ;
+            print ( monitor03 ) ;
+
+            outputTriples.flush() ;
+            outputQuads.flush() ;
+            objects.sync() ;
+        }
+        
+        public StatsCollectorNodeId getCollector() { return stats ; }
+        
+        private void write (Long g, Long s, Long p, Long o) {
+//            System.out.println ("> ( " + g + ", " + s + ", " + p + ", " + o + " )") ;
+            if ( g != null ) {
+                outputQuads.add(Tuple.create(g, s, p, o)) ;
+                stats.record(new NodeId(g), new NodeId(s), new NodeId(p), new NodeId(o)) ;
+            } else {
+                outputTriples.add(Tuple.create(s, p, o)) ;
+                stats.record(null, new NodeId(s), new NodeId(p), new NodeId(o)) ;
+            }
+        }
+    }
+
+    public static final NodeToLabel nodeToLabel = NodeToLabel.createScopeByDocument();
+    public static String serialize(Node node) {
+        StringWriter out = new StringWriter();
+        OutputLangUtils.output(out, node, null, nodeToLabel);
+        return out.toString();
+    }
+    
+    private static Node parse(String string) {
+        ParserProfile profile = RiotLib.profile(Lang.NQUADS, null, null) ;
+        Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(string) ;
+        if ( ! tokenizer.hasNext() )
+            return null ;
+        Token t = tokenizer.next();
+        Node n = profile.create(null, t) ;
+        if ( tokenizer.hasNext() )
+            Log.warn(RiotLib.class, "String has more than one token in it: "+string) ;
+        return n ;
+    }
+    
+    @Override
+    protected String getSummary()
+    {
+        return getCommandName()+" --loc=DIR FILE ..." ;
+    }
+
+    @Override
+    protected String getCommandName()
+    {
+        return this.getClass().getName() ;
+    }
+
+    class TripleSerializationFactory implements SerializationFactory<Tuple<Long>> {
+        @Override public Iterator<Tuple<Long>> createDeserializer(InputStream in) { return new TupleInputStream(in, 3); }
+        @Override public Sink<Tuple<Long>> createSerializer(OutputStream out) { return new TupleOutputStream(out); }
+        @Override public long getEstimatedMemorySize(Tuple<Long> item) { throw new AtlasException("Method not implemented.") ; }
+    }
+
+    class QuadSerializationFactory implements SerializationFactory<Tuple<Long>> {
+        @Override public Iterator<Tuple<Long>> createDeserializer(InputStream in) { return new TupleInputStream(in, 4); }
+        @Override public Sink<Tuple<Long>> createSerializer(OutputStream out) { return new TupleOutputStream(out); }
+        @Override public long getEstimatedMemorySize(Tuple<Long> item) { throw new AtlasException("Method not implemented.") ; }
+    }
+
+    class TupleComparator implements Comparator<Tuple<Long>> {
+        @Override
+        public int compare(Tuple<Long> t1, Tuple<Long> t2) {
+            int size = t1.size();
+            if ( size != t2.size() ) throw new AtlasException("Cannot compare tuple of different sizes.") ;
+            for ( int i = 0; i < size; i++ ) {
+                int result = t1.get(i).compareTo(t2.get(i)) ;
+                if ( result != 0 ) {
+                    return result ;
+                }
+            }
+            return 0;
+        }
+    }
+    
+    class TupleInputStream implements Iterator<Tuple<Long>>, Closeable {
+
+        private DataInputStream in ;
+        private int size ;
+        private Tuple<Long> slot = null ;
+        
+        public TupleInputStream(InputStream in, int size) {
+            this.in = createDataInputStream(in) ;
+            this.size = size ;
+            slot = readNext() ;
+        }
+
+        @Override
+        public boolean hasNext() {
+            return slot != null ;
+        }
+
+        @Override
+        public Tuple<Long> next() {
+            Tuple<Long> result = slot ;
+            slot = readNext() ;
+            return result ;
+        }
+        
+        private Tuple<Long> readNext() {
+            try {
+                if ( size == 3 ) {
+                    return Tuple.create(in.readLong(), in.readLong(), in.readLong()) ;
+                } else if ( size == 4 ) {
+                    return Tuple.create(in.readLong(), in.readLong(), in.readLong(), in.readLong()) ;
+                } else {
+                    throw new AtlasException("Unsupported size.") ;
+                }
+            } catch (IOException e) {
+                return null ;
+            }
+        }
+
+        @Override
+        public void remove() {
+            throw new AtlasException("Method not implemented.") ;
+        }
+
+        @Override
+        public void close() {
+            try {
+                in.close() ;
+            } catch (IOException e) {
+                new AtlasException(e) ;
+            }        
+        }
+        
+    }
+    
+    class TupleOutputStream implements Sink<Tuple<Long>> {
+
+        private DataOutputStream out ;
+        
+        public TupleOutputStream(OutputStream out) {
+            this.out = createDataOutputStream(out) ;
+        }
+
+        @Override
+        public void send(Tuple<Long> tuple) {
+            Iterator<Long> iter = tuple.iterator() ;
+            while ( iter.hasNext() ) {
+                try {
+                    out.writeLong( iter.next() ) ;
+                } catch (IOException e) {
+                    new AtlasException(e) ;
+                }
+            }
+        }
+
+        @Override
+        public void flush() {
+            try {
+                out.flush() ;
+            } catch (IOException e) {
+                new AtlasException(e) ;
+            }
+        }
+
+        @Override
+        public void close() {
+            try {
+                out.close() ;
+            } catch (IOException e) {
+                new AtlasException(e) ;
+            }
+        }
+        
+    }
+
+    static class PairSerializationFactory implements SerializationFactory<Pair<byte[], byte[]>> {
+        @Override public Iterator<Pair<byte[], byte[]>> createDeserializer(InputStream in) { return new PairInputStream(in); }
+        @Override public Sink<Pair<byte[], byte[]>> createSerializer(OutputStream out) { return new PairOutputStream(out); }
+        @Override public long getEstimatedMemorySize(Pair<byte[], byte[]> item) { throw new AtlasException("Method not implemented.") ; }
+    }
+
+    static class PairComparator implements Comparator<Pair<byte[], byte[]>> {
+        @Override
+        public int compare(Pair<byte[], byte[]> p1, Pair<byte[], byte[]> p2) {
+            return Bytes.compare(p1.getLeft(), p2.getLeft()) ;
+        }
+    }
+
+    static class PairInputStream implements Iterator<Pair<byte[], byte[]>>, Closeable {
+
+        private DataInputStream in ;
+        private Pair<byte[], byte[]> slot = null ;
+        
+        public PairInputStream(InputStream in) {
+            this.in = createDataInputStream(in) ;
+            slot = readNext() ;
+        }
+
+        @Override
+        public boolean hasNext() {
+            return slot != null ;
+        }
+
+        @Override
+        public Pair<byte[], byte[]> next() {
+            Pair<byte[], byte[]> result = slot ;
+            slot = readNext() ;
+            return result ;
+        }
+        
+        private Pair<byte[], byte[]> readNext() {
+            try {
+                byte left[] = new byte[in.readInt()] ;
+                in.readFully(left) ;
+                byte right[] = new byte[in.readInt()] ;
+                in.readFully(right) ;
+                return new Pair<byte[], byte[]> (left, right) ;
+            } catch (IOException e) {
+                return null ;
+            }
+        }
+
+        @Override
+        public void remove() {
+            throw new AtlasException("Method not implemented.") ;
+        }
+
+        @Override
+        public void close() {
+            try {
+                in.close() ;
+            } catch (IOException e) {
+                new AtlasException(e) ;
+            }        
+        }
+        
+    }
+    
+    static class PairOutputStream implements Sink<Pair<byte[], byte[]>> {
+
+        private DataOutputStream out ;
+        
+        public PairOutputStream(OutputStream out) {
+            this.out = createDataOutputStream(out) ;
+        }
+
+        @Override
+        public void send(Pair<byte[], byte[]> pair) {
+            try {
+                out.writeInt(pair.getLeft().length) ;
+                out.write(pair.getLeft()) ;
+                out.writeInt(pair.getRight().length) ;
+                out.write(pair.getRight()) ;
+            } catch (IOException e) {
+                new AtlasException(e) ;
+            }
+        }
+
+        @Override
+        public void flush() {
+            try {
+                out.flush() ;
+            } catch (IOException e) {
+                new AtlasException(e) ;
+            }
+        }
+
+        @Override
+        public void close() {
+            try {
+                out.close() ;
+            } catch (IOException e) {
+                new AtlasException(e) ;
+            }
+        }
+        
+    }
+
+    private static DataOutputStream createDataOutputStream(OutputStream out) {
+        try {
+            if ( no_buffer ) {
+                return new DataOutputStream( compression ? new GZIPOutputStream(out) : out ) ;
+            } else {
+                if ( gzip_outside ) {
+                    return new DataOutputStream( compression ? new GZIPOutputStream(new BufferedOutputStream(out, buffer_size)) : new BufferedOutputStream(out, buffer_size) ) ;
+                } else {
+                    return new DataOutputStream( compression ? new BufferedOutputStream(new GZIPOutputStream(out, buffer_size)) : new BufferedOutputStream(out, buffer_size) ) ;                
+                }
+            }
+            
+        } catch (IOException e) {
+            throw new AtlasException(e) ;
+        }
+    }
+    
+    private static DataInputStream createDataInputStream(InputStream in) {
+        try {
+            if ( no_buffer ) {
+                return new DataInputStream( compression ? new GZIPInputStream(in) : in ) ;
+            } else {
+                if ( gzip_outside ) {
+                    return new DataInputStream( compression ? new GZIPInputStream(new BufferedInputStream(in, buffer_size)) : new BufferedInputStream(in, buffer_size) ) ;
+                } else {
+                    return new DataInputStream( compression ? new BufferedInputStream(new GZIPInputStream(in, buffer_size)) : new BufferedInputStream(in, buffer_size) ) ;
+                }
+            }
+        } catch (IOException e) {
+            throw new AtlasException(e) ;
+        }
+    }
+    
+}
+

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/cmd/tdbloader2.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java?rev=1174559&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java Fri Sep 23 07:09:06 2011
@@ -0,0 +1,360 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.tdbloader2;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.PriorityQueue;
+
+import org.openjena.atlas.AtlasException;
+import org.openjena.atlas.data.AbstractDataBag;
+import org.openjena.atlas.data.SerializationFactory;
+import org.openjena.atlas.data.ThresholdPolicy;
+import org.openjena.atlas.iterator.Iter;
+import org.openjena.atlas.iterator.IteratorResourceClosing;
+import org.openjena.atlas.lib.Closeable;
+import org.openjena.atlas.lib.Sink;
+
+public class MultiThreadedSortedDataBag<E> extends AbstractDataBag<E>
+{
+    private final ThresholdPolicy<E> policy;
+    private final SerializationFactory<E> serializationFactory;
+    private final Comparator<? super E> comparator;
+    
+    protected boolean finishedAdding = false;
+    protected boolean spilled = false;
+    protected boolean closed = false;
+    
+    public MultiThreadedSortedDataBag(ThresholdPolicy<E> policy, SerializationFactory<E> serializerFactory, Comparator<? super E> comparator)
+    {
+        this.policy = policy;
+        this.serializationFactory = serializerFactory;
+        this.comparator = comparator;
+    }
+    
+    protected void checkClosed()
+    {
+        if (closed) throw new AtlasException("SortedDataBag is closed, no operations can be performed on it.") ;
+    }
+    
+    public boolean isSorted()
+    {
+        return true;
+    }
+
+    public boolean isDistinct()
+    {
+        return false;
+    }
+
+    public void add(E item)
+    {
+        checkClosed();
+        if (finishedAdding)
+            throw new AtlasException("SortedDataBag: Cannot add any more items after the writing phase is complete.");
+        
+        if (policy.isThresholdExceeded())
+        {
+            spill();
+        }
+        
+        if (memory.add(item))
+        {
+            policy.increment(item);
+            size++;
+        }
+    }
+    
+    private void spill()
+    {
+        // Make sure we have something to spill.
+        if (memory.size() > 0)
+        {
+            OutputStream out;
+            try
+            {
+                out = getSpillFile();
+            }
+            catch (IOException e)
+            {
+                throw new AtlasException(e);
+            }
+            
+            // Sort the tuples
+            // Collections.sort() will copy to an array, sort, and then copy back.  Avoid that
+            // extra copy by copying to an array and using Arrays.sort().  Also it lets us use
+            // Collection<E> instead of List<E> as the type for the memory object.  Unfortunately
+            // because of Java's crazy generics we have to do it as an Object array.
+            Object[] array = memory.toArray();
+            Sink<E> serializer = serializationFactory.createSerializer(out);
+
+            new Spiller (array, serializer).start();
+
+            spilled = true;
+            policy.reset();
+            memory.clear();
+        }
+    }
+
+    class Spiller extends Thread {
+        
+        private Object[] array ;
+        private Sink<E> serializer ;
+        
+        public Spiller(Object[] array, Sink<E> serializer) {
+            this.array = array ;
+            this.serializer = serializer ;
+        }
+        
+        @SuppressWarnings("unchecked")
+        @Override
+        public void run() {
+            Arrays.sort(array, (Comparator)comparator);
+            try
+            {
+                for (Object tuple : array)
+                {
+                    serializer.send((E)tuple);
+                }
+            }
+            finally
+            {
+                serializer.close();
+            }
+        }
+        
+    }
+    
+    public void flush()
+    {
+        spill();
+    }
+
+    /**
+     * Returns an iterator over a set of elements of type E.  If you do not exhaust
+     * the iterator, you should call {@link org.openjena.atlas.iterator.Iter#close(Iterator)}
+     * to be sure any open file handles are closed.
+     * 
+     * @return an Iterator
+     */
+    @SuppressWarnings({ "unchecked" })
+    public Iterator<E> iterator()
+    {
+        checkClosed();
+        
+        int memSize = memory.size();
+        
+        // Constructing an iterator from this class is not thread-safe (just like all the the other methods)
+        if (!finishedAdding && memSize > 1)
+        {
+            // Again, some ugliness for speed
+            Object[] array = memory.toArray();
+            Arrays.sort(array, (Comparator)comparator);
+            memory = Arrays.asList((E[])array);
+        }
+        
+        finishedAdding = true;
+        
+        if (spilled)
+        {
+            List<Iterator<E>> inputs = new ArrayList<Iterator<E>>(spillFiles.size() + (memSize > 0 ? 1 : 0));
+                        
+            if (memSize > 0)
+            {
+                inputs.add(memory.iterator());
+            }
+            
+            for (File spillFile : spillFiles)
+            {
+                try
+                {
+                    InputStream in = new BufferedInputStream(new FileInputStream(spillFile));
+                    
+                    Iterator<E> deserializer = serializationFactory.createDeserializer(in) ;
+                    IteratorResourceClosing<E> irc = new IteratorResourceClosing<E>(deserializer, in);
+                    inputs.add(irc);
+                }
+                catch (FileNotFoundException e)
+                {
+                    // Close any open streams before we throw an exception
+                    for (Iterator<E> it : inputs)
+                    {
+                        Iter.close(it);
+                    }
+                    
+                    throw new AtlasException("Cannot find one of the spill files", e);
+                }
+            }
+            
+            SpillSortIterator<E> ssi = new SpillSortIterator<E>(inputs, comparator);
+            registerCloseableIterator(ssi);
+            
+            return ssi;
+        }
+        else
+        {
+            if (memSize > 0)
+            {
+                return memory.iterator();
+            }
+            else
+            {
+                return Iter.nullIterator();
+            }
+        }
+    }
+    
+    public void close()
+    {
+        if (!closed)
+        {
+            closeIterators();
+            deleteSpillFiles();
+            
+            memory = null;
+            closed = true;
+        }
+    }
+    
+    /**
+     * An iterator that handles getting the next tuple from the bag.
+     */
+    private class SpillSortIterator<T> implements Iterator<T>, Closeable
+    {
+        private final List<Iterator<T>> inputs;
+        private final Comparator<? super T> comp;
+        private final PriorityQueue<Item<T>> minHeap;
+        
+        public SpillSortIterator(List<Iterator<T>> inputs, Comparator<? super T> comp)
+        {
+            this.inputs = inputs;
+            this.comp = comp;
+            this.minHeap = new PriorityQueue<Item<T>>(inputs.size());
+            
+            // Prime the heap
+            for (int i=0; i<inputs.size(); i++)
+            {
+                replaceItem(i);
+            }
+        }
+        
+        private void replaceItem(int index)
+        {
+            Iterator<T> it = inputs.get(index);
+            if (it.hasNext())
+            {
+                T tuple = it.next();
+                minHeap.add(new Item<T>(index, tuple, comp));
+            }
+        }
+
+        public boolean hasNext()
+        {
+            return (minHeap.peek() != null);
+        }
+
+        public T next()
+        {
+            if (!hasNext())
+            {
+                throw new NoSuchElementException();
+            }
+            
+            Item<T> curr = minHeap.poll();
+            // Read replacement item
+            replaceItem(curr.getIndex());
+            
+            return curr.getTuple();
+        }
+
+        public void remove()
+        {
+            throw new UnsupportedOperationException("SpillSortIterator.remove");
+        }
+
+        public void close()
+        {
+            for (Iterator<T> it : inputs)
+            {
+                Iter.close(it);
+            }
+        }
+        
+        private final class Item<U> implements Comparable<Item<U>>
+        {
+            private final int index;
+            private final U tuple;
+            private final Comparator<? super U> c;
+            
+            public Item(int index, U tuple, Comparator<? super U> c)
+            {
+                this.index = index;
+                this.tuple = tuple;
+                this.c = c;
+            }
+            
+            public int getIndex()
+            {
+                return index;
+            }
+            
+            public U getTuple()
+            {
+                return tuple;
+            }
+            
+            @SuppressWarnings("unchecked")
+            public int compareTo(Item<U> o)
+            {
+                return (null != c) ? c.compare(tuple, o.getTuple()) : ((Comparable<U>)tuple).compareTo(o.getTuple());
+            }
+            
+            @SuppressWarnings("unchecked")
+            @Override
+            public boolean equals(Object obj)
+            {
+                if (obj instanceof Item)
+                {
+                    return compareTo((Item<U>)obj) == 0;
+                }
+                
+                return false;
+            }
+            
+            @Override
+            public int hashCode()
+            {
+                return tuple.hashCode();
+            }
+        }
+        
+    }
+
+}

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/MultiThreadedSortedDataBag.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/ProgressLogger.java
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/ProgressLogger.java?rev=1174559&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/ProgressLogger.java (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/ProgressLogger.java Fri Sep 23 07:09:06 2011
@@ -0,0 +1,127 @@
+/*
+ * (c) Copyright 2010 Epimorphics Ltd.
+ * All rights reserved.
+ * [See end of file]
+ */
+
+package org.apache.jena.tdbloader2;
+
+import static com.hp.hpl.jena.sparql.util.Utils.nowAsString ;
+import org.slf4j.Logger ;
+
+import com.hp.hpl.jena.sparql.util.Timer ;
+
+/** Progress monitor */
+public class ProgressLogger
+{
+    // DELEET when an ARQ update sweeps through.
+    private final Logger log ;
+    private final long tickPoint ;
+    private final int superTick ;
+    private final Timer timer ;
+    private final String label ;
+    
+    private long counterBatch = 0 ;
+    private long counterTotal = 0 ;
+    
+    private long lastTime = 0 ;
+    
+    public ProgressLogger(Logger log, String label, long tickPoint, int superTick)
+    {
+        this.log = log ;
+        this.label = label ;
+        this.tickPoint = tickPoint ;
+        this.superTick = superTick ;
+        this.timer = new Timer() ;
+    }
+    
+    public void start()
+    {
+        timer.startTimer() ;
+        lastTime = 0 ;
+    }
+
+    public long finish()
+    {
+        long totalTime = timer.endTimer() ;
+        return totalTime ;
+    }
+    
+    public long getTicks()
+    {
+        return counterTotal ;
+    }
+    
+    public void tick()
+    {
+        counterBatch++ ;
+        counterTotal++ ;
+    
+        if ( tickPoint(counterTotal, tickPoint) )
+        {
+            long timePoint = timer.readTimer() ;
+            long thisTime = timePoint - lastTime ;
+        
+            // *1000L is milli to second conversion
+        
+            long batchAvgRate = (counterBatch * 1000L) / thisTime;
+            long runAvgRate   = (counterTotal * 1000L) / timePoint ;
+            print("Add: %,d %s (Batch: %,d / Avg: %,d)", counterTotal, label, batchAvgRate, runAvgRate) ;
+            lastTime = timePoint ;
+
+            if ( tickPoint(counterTotal, superTick*tickPoint) )
+                elapsed(timePoint) ;
+            counterBatch = 0 ;
+            lastTime = timePoint ;
+        }
+    }
+    
+    private void elapsed(long timerReading)
+    {
+        float elapsedSecs = timerReading/1000F ;
+        print("  Elapsed: %,.2f seconds [%s]", elapsedSecs, nowAsString()) ;
+    }
+    
+    private void print(String fmt, Object...args)
+    {
+        if ( log != null && log.isInfoEnabled() )
+        {
+            String str = String.format(fmt, args) ;
+            log.info(str) ;
+        }
+    }
+    
+ 
+    
+    static boolean tickPoint(long counter, long quantum)
+    {
+        return counter%quantum == 0 ;
+    }
+
+}
+/*
+ * (c) Copyright 2010 Epimorphics Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
\ No newline at end of file

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/main/java/org/apache/jena/tdbloader2/ProgressLogger.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/log4j.properties?rev=1174559&view=auto
==============================================================================
--- incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/log4j.properties (added)
+++ incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/log4j.properties Fri Sep 23 07:09:06 2011
@@ -0,0 +1,12 @@
+log4j.rootLogger=INFO, stdlog
+
+log4j.appender.stdlog=org.apache.log4j.ConsoleAppender
+## log4j.appender.stdlog.target=System.err
+log4j.appender.stdlog.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdlog.layout.ConversionPattern=%d{HH:mm:ss} %-5p %-25c{1} :: %m%n
+
+log4j.logger.org.apache.hadoop.metrics2=ERROR
+log4j.logger.org.apache.hadoop=ERROR
+log4j.logger.org.mortbay=ERROR
+
+log4j.logger.org.apache.jena.tdbloader2=DEBUG

Propchange: incubator/jena/Scratch/PC/tdbloader2/trunk/src/test/resources/log4j.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain