You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by ss...@apache.org on 2013/11/06 22:04:13 UTC
git commit: add support for bzip2 to KiWiLoader
Updated Branches:
refs/heads/develop a91b05893 -> 091ee0d9f
add support for bzip2 to KiWiLoader
Project: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/commit/091ee0d9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/tree/091ee0d9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/diff/091ee0d9
Branch: refs/heads/develop
Commit: 091ee0d9feaec667a0eea50008acbbe1ccf9d6bc
Parents: a91b058
Author: Sebastian Schaffert <ss...@apache.org>
Authored: Wed Nov 6 22:04:07 2013 +0100
Committer: Sebastian Schaffert <ss...@apache.org>
Committed: Wed Nov 6 22:04:07 2013 +0100
----------------------------------------------------------------------
libraries/kiwi/kiwi-loader/pom.xml | 39 +++++++++++---------
.../marmotta/kiwi/loader/KiWiHandler.java | 5 +++
.../apache/marmotta/kiwi/loader/KiWiLoader.java | 26 +++++++++----
.../marmotta/kiwi/sail/KiWiValueFactory.java | 3 --
4 files changed, 46 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/091ee0d9/libraries/kiwi/kiwi-loader/pom.xml
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/pom.xml b/libraries/kiwi/kiwi-loader/pom.xml
index c35d6e6..59e225a 100644
--- a/libraries/kiwi/kiwi-loader/pom.xml
+++ b/libraries/kiwi/kiwi-loader/pom.xml
@@ -69,6 +69,12 @@
</build>
<dependencies>
+
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ <version>1.4.1</version>
+ </dependency>
<dependency>
<groupId>org.apache.marmotta</groupId>
<artifactId>kiwi-triplestore</artifactId>
@@ -83,6 +89,20 @@
</dependency>
<dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jcl-over-slf4j</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>log4j-over-slf4j</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+
+
+ <dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
</dependency>
@@ -106,11 +126,6 @@
<artifactId>commons-configuration</artifactId>
</dependency>
<dependency>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- <version>1.1.3</version>
- </dependency>
- <dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</dependency>
@@ -127,11 +142,6 @@
</dependency>
<dependency>
<groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-rio-ntriples</artifactId>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.openrdf.sesame</groupId>
<artifactId>sesame-rio-n3</artifactId>
<scope>runtime</scope>
</dependency>
@@ -155,11 +165,6 @@
<artifactId>sesame-rio-trig</artifactId>
<scope>runtime</scope>
</dependency>
- <dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-rio-trig</artifactId>
- <scope>runtime</scope>
- </dependency>
<dependency>
<groupId>org.apache.marmotta</groupId>
@@ -168,8 +173,8 @@
<type>test-jar</type>
</dependency>
<dependency>
- <!-- License: CPL-1.0 http://www.opensource.org/licenses/cpl1.0
- As a MAVEN dependency it is not included in a source release. As a TEST dependency
+ <!-- License: CPL-1.0 http://www.opensource.org/licenses/cpl1.0
+ As a MAVEN dependency it is not included in a source release. As a TEST dependency
it is not included in a BINARY release, so no need to mention in N&L. -->
<groupId>com.github.stefanbirkner</groupId>
<artifactId>system-rules</artifactId>
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/091ee0d9/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
index f89fd35..c2ecc7a 100644
--- a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
+++ b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
@@ -43,6 +43,7 @@ public class KiWiHandler implements RDFHandler {
long count = 0;
long start = 0;
+ long previous = 0;
private KiWiLoaderConfiguration config;
@@ -122,6 +123,7 @@ public class KiWiHandler implements RDFHandler {
}
this.start = System.currentTimeMillis();
+ this.previous = System.currentTimeMillis();
}
/**
@@ -176,6 +178,9 @@ public class KiWiHandler implements RDFHandler {
if(count % config.getCommitBatchSize() == 0) {
connection.commit();
+
+ log.info("imported {} triples ({}/sec)", count, (config.getCommitBatchSize() * 1000) / (System.currentTimeMillis() - previous));
+ previous = System.currentTimeMillis();
}
} catch (SQLException | ExecutionException e) {
throw new RDFHandlerException(e);
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/091ee0d9/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java
index 2ca0475..6ef7ffd 100644
--- a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java
+++ b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java
@@ -24,6 +24,8 @@ import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
@@ -44,6 +46,7 @@ import org.openrdf.rio.UnsupportedRDFormatException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.BufferedInputStream;
import java.io.Console;
import java.io.File;
import java.io.FileInputStream;
@@ -146,7 +149,7 @@ public class KiWiLoader {
String baseUri = endWith("http://localhost/", "/"),
context = null, //baseUri+"context/default",
format = null;
- boolean gzip = false;
+ boolean gzip = false, bzip = false;
String dbCon = null, dbUser = null, dbPasswd = null;
KiWiDialect dialect = null;
@@ -208,6 +211,7 @@ public class KiWiLoader {
// force uncompressing the files (will try to guess if this option is not set)
gzip = cmd.hasOption('z');
+ bzip = cmd.hasOption('j');
// the format to use as fallback; will try to guess based on the filename.
format = cmd.getOptionValue('f');
@@ -253,24 +257,30 @@ public class KiWiLoader {
InputStream inStream = new FileInputStream(f);
if (gzip || inFile.endsWith(".gz")) {
log.debug("{} seems to be gzipped", inFile);
- inStream = new GZIPInputStream(inStream);
+ inStream = new GzipCompressorInputStream(inStream,true);
+ fName = fName.replaceFirst("\\.gz$", "");
+ } else if(bzip || inFile.endsWith(".bz2")) {
+ log.debug("{} seems to be bzip2 compressed", inFile);
+ inStream = new BZip2CompressorInputStream(inStream,true);
fName = fName.replaceFirst("\\.gz$", "");
}
long start = System.currentTimeMillis();
try {
- loader.load(inStream, RDFFormat.forFileName(fName, fmt));
+ loader.load(new BufferedInputStream(inStream), RDFFormat.forFileName(fName, fmt));
} catch (final UnsupportedRDFormatException | RDFParseException e) {
// try again with the fixed format
if (fmt != null) {
inStream.close();
// Reopen new
- if (inStream instanceof GZIPInputStream) {
- inStream = new GZIPInputStream(new FileInputStream(f));
+ if (inStream instanceof GzipCompressorInputStream) {
+ inStream = new GzipCompressorInputStream(new FileInputStream(f),true);
+ } else if (inStream instanceof BZip2CompressorInputStream) {
+ inStream = new BZip2CompressorInputStream(new FileInputStream(f),true);
} else {
inStream = new FileInputStream(f);
}
- loader.load(inStream, fmt);
+ loader.load(new BufferedInputStream(inStream), fmt);
} else {
throw e;
}
@@ -284,7 +294,8 @@ public class KiWiLoader {
} catch (IOException e) {
log.error("Error while reading {}: {}", inFile, e);
} catch (RDFParseException e) {
- log.error("file {} contains errors: {}\n{}", inFile, e.getMessage(), e);
+ log.error("file {} contains errors: {}\n{}", inFile, e.getMessage());
+ log.error("exception details",e);
} catch (UnsupportedRDFormatException e) {
log.error("{}, required for {} - dependency missing?", e.getMessage(), inFile);
}
@@ -467,6 +478,7 @@ public class KiWiLoader {
options.addOption(context);
options.addOption("z", false, "Input file is gzip compressed");
+ options.addOption("j", false, "Input file is bzip2 compressed");
final Option format = new Option("f", "format", true, "format of rdf file (if guessing based on the extension does not work)");
format.setArgName("mime-type");
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/091ee0d9/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/sail/KiWiValueFactory.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/sail/KiWiValueFactory.java b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/sail/KiWiValueFactory.java
index 5f2b502..33acd4c 100644
--- a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/sail/KiWiValueFactory.java
+++ b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/sail/KiWiValueFactory.java
@@ -85,9 +85,6 @@ public class KiWiValueFactory implements ValueFactory {
private String defaultContext;
- private int poolSize = 4;
- private int poolPosition = 0;
-
private LoadingCache<String,KiWiUriResource> uriCache;
private LoadingCache<String,KiWiAnonResource> bnodeCache;
private LoadingCache<LiteralKey, KiWiLiteral> literalCache;