You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by ss...@apache.org on 2013/11/06 22:04:13 UTC

git commit: add support for bzip2 to KiWiLoader

Updated Branches:
  refs/heads/develop a91b05893 -> 091ee0d9f


add support for bzip2 to KiWiLoader


Project: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/commit/091ee0d9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/tree/091ee0d9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/diff/091ee0d9

Branch: refs/heads/develop
Commit: 091ee0d9feaec667a0eea50008acbbe1ccf9d6bc
Parents: a91b058
Author: Sebastian Schaffert <ss...@apache.org>
Authored: Wed Nov 6 22:04:07 2013 +0100
Committer: Sebastian Schaffert <ss...@apache.org>
Committed: Wed Nov 6 22:04:07 2013 +0100

----------------------------------------------------------------------
 libraries/kiwi/kiwi-loader/pom.xml              | 39 +++++++++++---------
 .../marmotta/kiwi/loader/KiWiHandler.java       |  5 +++
 .../apache/marmotta/kiwi/loader/KiWiLoader.java | 26 +++++++++----
 .../marmotta/kiwi/sail/KiWiValueFactory.java    |  3 --
 4 files changed, 46 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/091ee0d9/libraries/kiwi/kiwi-loader/pom.xml
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/pom.xml b/libraries/kiwi/kiwi-loader/pom.xml
index c35d6e6..59e225a 100644
--- a/libraries/kiwi/kiwi-loader/pom.xml
+++ b/libraries/kiwi/kiwi-loader/pom.xml
@@ -69,6 +69,12 @@
     </build>
 
     <dependencies>
+
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-compress</artifactId>
+            <version>1.4.1</version>
+        </dependency>
         <dependency>
             <groupId>org.apache.marmotta</groupId>
             <artifactId>kiwi-triplestore</artifactId>
@@ -83,6 +89,20 @@
         </dependency>
 
         <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>jcl-over-slf4j</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>log4j-over-slf4j</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+
+
+        <dependency>
             <groupId>com.h2database</groupId>
             <artifactId>h2</artifactId>
         </dependency>
@@ -106,11 +126,6 @@
             <artifactId>commons-configuration</artifactId>
         </dependency>
         <dependency>
-            <groupId>commons-logging</groupId>
-            <artifactId>commons-logging</artifactId>
-            <version>1.1.3</version>
-        </dependency>
-        <dependency>
             <groupId>ch.qos.logback</groupId>
             <artifactId>logback-classic</artifactId>
         </dependency>
@@ -127,11 +142,6 @@
         </dependency>
         <dependency>
             <groupId>org.openrdf.sesame</groupId>
-            <artifactId>sesame-rio-ntriples</artifactId>
-            <scope>runtime</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.openrdf.sesame</groupId>
             <artifactId>sesame-rio-n3</artifactId>
             <scope>runtime</scope>
         </dependency>
@@ -155,11 +165,6 @@
             <artifactId>sesame-rio-trig</artifactId>
             <scope>runtime</scope>
         </dependency>
-        <dependency>
-            <groupId>org.openrdf.sesame</groupId>
-            <artifactId>sesame-rio-trig</artifactId>
-            <scope>runtime</scope>
-        </dependency>
 
         <dependency>
             <groupId>org.apache.marmotta</groupId>
@@ -168,8 +173,8 @@
             <type>test-jar</type>
         </dependency>
         <dependency>
-            <!-- License: CPL-1.0 http://www.opensource.org/licenses/cpl1.0 
-                As a MAVEN dependency it is not included in a source release. As a TEST dependency 
+            <!-- License: CPL-1.0 http://www.opensource.org/licenses/cpl1.0
+                As a MAVEN dependency it is not included in a source release. As a TEST dependency
                 it is not included in a BINARY release, so no need to mention in N&L. -->
             <groupId>com.github.stefanbirkner</groupId>
             <artifactId>system-rules</artifactId>

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/091ee0d9/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
index f89fd35..c2ecc7a 100644
--- a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
+++ b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
@@ -43,6 +43,7 @@ public class KiWiHandler implements RDFHandler {
 
     long count = 0;
     long start = 0;
+    long previous = 0;
 
     private KiWiLoaderConfiguration config;
 
@@ -122,6 +123,7 @@ public class KiWiHandler implements RDFHandler {
         }
 
         this.start = System.currentTimeMillis();
+        this.previous = System.currentTimeMillis();
     }
 
     /**
@@ -176,6 +178,9 @@ public class KiWiHandler implements RDFHandler {
 
             if(count % config.getCommitBatchSize() == 0) {
                 connection.commit();
+
+                log.info("imported {} triples ({}/sec)", count, (config.getCommitBatchSize() * 1000) / (System.currentTimeMillis() - previous));
+                previous = System.currentTimeMillis();
             }
         } catch (SQLException | ExecutionException e) {
             throw new RDFHandlerException(e);

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/091ee0d9/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java
index 2ca0475..6ef7ffd 100644
--- a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java
+++ b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java
@@ -24,6 +24,8 @@ import org.apache.commons.cli.Option;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.PosixParser;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.apache.commons.configuration.Configuration;
 import org.apache.commons.configuration.ConfigurationException;
 import org.apache.commons.configuration.PropertiesConfiguration;
@@ -44,6 +46,7 @@ import org.openrdf.rio.UnsupportedRDFormatException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.BufferedInputStream;
 import java.io.Console;
 import java.io.File;
 import java.io.FileInputStream;
@@ -146,7 +149,7 @@ public class KiWiLoader {
             String baseUri = endWith("http://localhost/", "/"),
                     context = null, //baseUri+"context/default", 
                     format = null;
-            boolean gzip = false;
+            boolean gzip = false, bzip = false;
 
             String dbCon = null, dbUser = null, dbPasswd = null;
             KiWiDialect dialect = null;
@@ -208,6 +211,7 @@ public class KiWiLoader {
 
             // force uncompressing the files (will try to guess if this option is not set)
             gzip = cmd.hasOption('z');
+            bzip = cmd.hasOption('j');
 
             // the format to use as fallback; will try to guess based on the filename.
             format = cmd.getOptionValue('f');
@@ -253,24 +257,30 @@ public class KiWiLoader {
                     InputStream inStream = new FileInputStream(f);
                     if (gzip || inFile.endsWith(".gz")) {
                         log.debug("{} seems to be gzipped", inFile);
-                        inStream = new GZIPInputStream(inStream);
+                        inStream = new GzipCompressorInputStream(inStream,true);
+                        fName = fName.replaceFirst("\\.gz$", "");
+                    } else if(bzip || inFile.endsWith(".bz2")) {
+                        log.debug("{} seems to be bzip2 compressed", inFile);
+                        inStream = new BZip2CompressorInputStream(inStream,true);
                         fName = fName.replaceFirst("\\.gz$", "");
                     }
 
                     long start = System.currentTimeMillis();
                     try {
-                        loader.load(inStream, RDFFormat.forFileName(fName, fmt));
+                        loader.load(new BufferedInputStream(inStream), RDFFormat.forFileName(fName, fmt));
                     } catch (final UnsupportedRDFormatException | RDFParseException e) {
                         // try again with the fixed format
                         if (fmt != null) {
                             inStream.close();
                             // Reopen new
-                            if (inStream instanceof GZIPInputStream) {
-                                inStream = new GZIPInputStream(new FileInputStream(f));
+                            if (inStream instanceof GzipCompressorInputStream) {
+                                inStream = new GzipCompressorInputStream(new FileInputStream(f),true);
+                            } else if (inStream instanceof BZip2CompressorInputStream) {
+                                inStream = new BZip2CompressorInputStream(new FileInputStream(f),true);
                             } else {
                                 inStream = new FileInputStream(f);
                             }
-                            loader.load(inStream, fmt);
+                            loader.load(new BufferedInputStream(inStream), fmt);
                         } else {
                             throw e;
                         }
@@ -284,7 +294,8 @@ public class KiWiLoader {
                 } catch (IOException e) {
                     log.error("Error while reading {}: {}", inFile, e);
                 } catch (RDFParseException e) {
-                    log.error("file {} contains errors: {}\n{}", inFile, e.getMessage(), e);
+                    log.error("file {} contains errors: {}\n{}", inFile, e.getMessage());
+                    log.error("exception details",e);
                 } catch (UnsupportedRDFormatException e) {
                     log.error("{}, required for {} - dependency missing?", e.getMessage(), inFile);
                 }
@@ -467,6 +478,7 @@ public class KiWiLoader {
         options.addOption(context);
 
         options.addOption("z", false, "Input file is gzip compressed");
+        options.addOption("j", false, "Input file is bzip2 compressed");
 
         final Option format = new Option("f", "format", true, "format of rdf file (if guessing based on the extension does not work)");
         format.setArgName("mime-type");

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/091ee0d9/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/sail/KiWiValueFactory.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/sail/KiWiValueFactory.java b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/sail/KiWiValueFactory.java
index 5f2b502..33acd4c 100644
--- a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/sail/KiWiValueFactory.java
+++ b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/sail/KiWiValueFactory.java
@@ -85,9 +85,6 @@ public class KiWiValueFactory implements ValueFactory {
     private String defaultContext;
 
 
-    private int poolSize = 4;
-    private int poolPosition = 0;
-
     private LoadingCache<String,KiWiUriResource> uriCache;
     private LoadingCache<String,KiWiAnonResource> bnodeCache;
     private LoadingCache<LiteralKey, KiWiLiteral> literalCache;