You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by ss...@apache.org on 2013/11/06 19:09:58 UTC

git commit: improved KiWi bulk import (MARMOTTA-310, MARMOTTA-245)

Updated Branches:
  refs/heads/develop 757b8db60 -> e7692e68b


improved KiWi bulk import (MARMOTTA-310, MARMOTTA-245)


Project: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/commit/e7692e68
Tree: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/tree/e7692e68
Diff: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/diff/e7692e68

Branch: refs/heads/develop
Commit: e7692e68bdcae9e1d988cbd9cc108ab0631bb402
Parents: 757b8db
Author: Sebastian Schaffert <ss...@apache.org>
Authored: Wed Nov 6 19:09:17 2013 +0100
Committer: Sebastian Schaffert <ss...@apache.org>
Committed: Wed Nov 6 19:09:17 2013 +0100

----------------------------------------------------------------------
 .../marmotta/kiwi/loader/KiWiHandler.java       | 290 ++++++++++++++++++-
 .../apache/marmotta/kiwi/loader/KiWiLoader.java |  89 ++----
 .../kiwi/loader/KiWiLoaderConfiguration.java    |  68 +++++
 .../marmotta/kiwi/loader/KiWiHandlerTest.java   | 107 +++++++
 .../marmotta/kiwi/loader/KiWiLoaderTest.java    |  30 +-
 .../apache/marmotta/kiwi/loader/demo-data.foaf  |  78 +++++
 6 files changed, 581 insertions(+), 81 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/e7692e68/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
index 3a884d6..f89fd35 100644
--- a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
+++ b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
@@ -1,22 +1,90 @@
 package org.apache.marmotta.kiwi.loader;
 
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import org.apache.marmotta.commons.sesame.model.Namespaces;
+import org.apache.marmotta.commons.util.DateUtils;
+import org.apache.marmotta.kiwi.model.rdf.*;
 import org.apache.marmotta.kiwi.persistence.KiWiConnection;
+import org.apache.marmotta.kiwi.sail.KiWiStore;
+import org.openrdf.model.BNode;
+import org.openrdf.model.Literal;
 import org.openrdf.model.Statement;
+import org.openrdf.model.URI;
+import org.openrdf.model.Value;
+import org.openrdf.model.impl.URIImpl;
 import org.openrdf.rio.RDFHandler;
 import org.openrdf.rio.RDFHandlerException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.SQLException;
+import java.util.Date;
+import java.util.IllformedLocaleException;
+import java.util.Locale;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
 
 /**
  * A fast-lane RDF import handler that allows bulk-importing triples into a KiWi triplestore. It directly accesses
- * the database using a KiWiConnection.
+ * the database using a KiWiConnection. Note that certain configuration options will make the import "unsafe"
+ * because they turn off expensive existance checks. If you are not careful and import the same data twice, this
+ * might mean duplicate entries in the database.
  *
  * @author Sebastian Schaffert (sschaffert@apache.org)
  */
 public class KiWiHandler implements RDFHandler {
 
+    private static Logger log = LoggerFactory.getLogger(KiWiHandler.class);
 
     private KiWiConnection connection;
+    private KiWiStore store;
+
+    long count = 0;
+    long start = 0;
+
+    private KiWiLoaderConfiguration config;
+
+    private LoadingCache<Literal, KiWiLiteral> literalCache;
+    private LoadingCache<URI, KiWiUriResource> uriCache;
+    private LoadingCache<BNode, KiWiAnonResource> bnodeCache;
+
+    public KiWiHandler(KiWiStore store, KiWiLoaderConfiguration config) {
+        this.config     = config;
+        this.store      = store;
+
+        this.literalCache = CacheBuilder.newBuilder()
+                .maximumSize(100000)
+                .expireAfterAccess(10, TimeUnit.MINUTES)
+                .build(new CacheLoader<Literal, KiWiLiteral>() {
+                    @Override
+                    public KiWiLiteral load(Literal l) throws Exception {
+                        return createLiteral(l);
+                    }
+                });
+
+        this.uriCache = CacheBuilder.newBuilder()
+                .maximumSize(500000)
+                .expireAfterAccess(10, TimeUnit.MINUTES)
+                .build(new CacheLoader<URI, KiWiUriResource>() {
+                    @Override
+                    public KiWiUriResource load(URI key) throws Exception {
+                        return createURI(key.stringValue());
+                    }
+                });
 
+        this.bnodeCache = CacheBuilder.newBuilder()
+                .maximumSize(10000)
+                .expireAfterAccess(10, TimeUnit.MINUTES)
+                .build(new CacheLoader<BNode, KiWiAnonResource>() {
+                    @Override
+                    public KiWiAnonResource load(BNode key) throws Exception {
+                        return createBNode(key.stringValue());
+                    }
+                });
 
+    }
 
     /**
      * Signals the end of the RDF data. This method is called when all data has
@@ -27,7 +95,14 @@ public class KiWiHandler implements RDFHandler {
      */
     @Override
     public void endRDF() throws RDFHandlerException {
-        //To change body of implemented methods use File | Settings | File Templates.
+        try {
+            connection.commit();
+            connection.close();
+        } catch (SQLException e) {
+            throw new RDFHandlerException(e);
+        }
+
+        log.info("KiWiLoader: RDF bulk import of {} triples finished after {} ms", count, System.currentTimeMillis() - start);
     }
 
     /**
@@ -39,7 +114,14 @@ public class KiWiHandler implements RDFHandler {
      */
     @Override
     public void startRDF() throws RDFHandlerException {
-        //To change body of implemented methods use File | Settings | File Templates.
+        log.info("KiWiLoader: starting RDF bulk import");
+        try {
+            this.connection = store.getPersistence().getConnection();
+        } catch (SQLException e) {
+            throw new RDFHandlerException(e);
+        }
+
+        this.start = System.currentTimeMillis();
     }
 
     /**
@@ -56,7 +138,11 @@ public class KiWiHandler implements RDFHandler {
      */
     @Override
     public void handleNamespace(String prefix, String uri) throws RDFHandlerException {
-        //To change body of implemented methods use File | Settings | File Templates.
+        try {
+            connection.storeNamespace(new KiWiNamespace(prefix,uri));
+        } catch (SQLException e) {
+            throw new RDFHandlerException(e);
+        }
     }
 
     /**
@@ -68,9 +154,202 @@ public class KiWiHandler implements RDFHandler {
      */
     @Override
     public void handleStatement(Statement st) throws RDFHandlerException {
-        //To change body of implemented methods use File | Settings | File Templates.
+        try {
+            KiWiResource subject = (KiWiResource)convertNode(st.getSubject());
+            KiWiUriResource predicate = (KiWiUriResource)convertNode(st.getPredicate());
+            KiWiNode object = convertNode(st.getObject());
+            KiWiResource context;
+
+            if(config.getContext() != null) {
+                context = (KiWiResource)convertNode(new URIImpl(config.getContext()));
+            } else {
+                context = (KiWiResource)convertNode(st.getContext());
+            }
+
+            KiWiTriple result = new KiWiTriple(subject,predicate,object,context);
+            if(config.isStatementExistanceCheck()) {
+                result.setId(connection.getTripleId(subject, predicate, object, context, true));
+            }
+            connection.storeTriple(result);
+
+            count++;
+
+            if(count % config.getCommitBatchSize() == 0) {
+                connection.commit();
+            }
+        } catch (SQLException | ExecutionException e) {
+            throw new RDFHandlerException(e);
+        }
+
+    }
+
+
+    private KiWiNode convertNode(Value value) throws ExecutionException {
+        if(value == null) {
+            return null;
+        } else if(value instanceof KiWiNode) {
+            return (KiWiNode)value;
+        } else if(value instanceof URI) {
+            return uriCache.get((URI)value);
+        } else if(value instanceof BNode) {
+            return bnodeCache.get(((BNode)value));
+        } else if(value instanceof Literal) {
+            Literal l = (Literal)value;
+            return literalCache.get(l);
+        } else {
+            throw new IllegalArgumentException("the value passed as argument does not have the correct type");
+        }
+
+    }
+
+    private KiWiLiteral createLiteral(Literal l) throws ExecutionException {
+        String value = l.getLabel();
+        String lang  = l.getLanguage();
+        URI    type  = l.getDatatype();
+
+
+        Locale locale;
+        if(lang != null) {
+            try {
+                Locale.Builder builder = new Locale.Builder();
+                builder.setLanguageTag(lang);
+                locale = builder.build();
+            } catch (IllformedLocaleException ex) {
+                log.warn("malformed language literal (language: {})", lang);
+                locale = null;
+                lang = null;
+            }
+        } else {
+            locale = null;
+        }
+
+
+        KiWiLiteral result;
+        final KiWiUriResource rtype = type==null ? null : uriCache.get(type);
+
+        try {
+
+            try {
+                // differentiate between the different types of the value
+                if (type == null) {
+                    // FIXME: MARMOTTA-39 (this is to avoid a NullPointerException in the following if-clauses)
+                    result = connection.loadLiteral(value.toString(), lang, rtype);
+
+                    if(result == null) {
+                        result = new KiWiStringLiteral(value.toString(), locale, rtype);
+                    }
+                } else if(type.equals(Namespaces.NS_XSD+"dateTime")) {
+                    // parse if necessary
+                    final Date dvalue = DateUtils.parseDate(value.toString());
+
+                    result = connection.loadLiteral(dvalue);
+
+                    if(result == null) {
+                        result= new KiWiDateLiteral(dvalue, rtype);
+                    }
+                } else if(type.equals(Namespaces.NS_XSD+"integer") || type.equals(Namespaces.NS_XSD+"long")) {
+                    long ivalue = Long.parseLong(value.toString());
+
+                    result = connection.loadLiteral(ivalue);
+
+                    if(result == null) {
+                        result= new KiWiIntLiteral(ivalue, rtype);
+                    }
+                } else if(type.equals(Namespaces.NS_XSD+"double") || type.equals(Namespaces.NS_XSD+"float")) {
+                    double dvalue = Double.parseDouble(value.toString());
+
+                    result = connection.loadLiteral(dvalue);
+
+                    if(result == null) {
+                        result= new KiWiDoubleLiteral(dvalue, rtype);
+                    }
+                } else if(type.equals(Namespaces.NS_XSD+"boolean")) {
+                    boolean bvalue = Boolean.parseBoolean(value.toString());
+
+                    result = connection.loadLiteral(bvalue);
+
+                    if(result == null) {
+                        result= new KiWiBooleanLiteral(bvalue, rtype);
+                    }
+                } else {
+                    result = connection.loadLiteral(value.toString(), lang, rtype);
+
+                    if(result == null) {
+                        result = new KiWiStringLiteral(value.toString(), locale, rtype);
+                    }
+                }
+            } catch(IllegalArgumentException ex) {
+                // malformed number or date
+                log.warn("malformed argument for typed literal of type {}: {}", rtype.stringValue(), value);
+                KiWiUriResource mytype = createURI(Namespaces.NS_XSD+"string");
+
+                result = connection.loadLiteral(value.toString(), lang, mytype);
+
+                if(result == null) {
+                    result = new KiWiStringLiteral(value.toString(), locale, mytype);
+                }
+
+            }
+
+            if(result.getId() == null) {
+                connection.storeNode(result, false);
+            }
+
+            return result;
+
+
+        } catch (SQLException e) {
+            log.error("database error, could not load literal",e);
+            throw new IllegalStateException("database error, could not load literal",e);
+        }
+    }
+
+    private KiWiUriResource createURI(String uri) {
+        try {
+            // first look in the registry for newly created resources if the resource has already been created and
+            // is still volatile
+            KiWiUriResource result = connection.loadUriResource(uri);
+
+            if(result == null) {
+                result = new KiWiUriResource(uri);
+
+                connection.storeNode(result, false);
+
+            }
+            if(result.getId() == null) {
+                log.error("node ID is null!");
+            }
+
+            return result;
+        } catch (SQLException e) {
+            log.error("database error, could not load URI resource",e);
+            throw new IllegalStateException("database error, could not load URI resource",e);
+        }
     }
 
+    private KiWiAnonResource createBNode(String nodeID) {
+        try {
+            // first look in the registry for newly created resources if the resource has already been created and
+            // is still volatile
+            KiWiAnonResource result = connection.loadAnonResource(nodeID);
+
+            if(result == null) {
+                result = new KiWiAnonResource(nodeID);
+                connection.storeNode(result, false);
+            }
+            if(result.getId() == null) {
+                log.error("node ID is null!");
+            }
+
+            return result;
+        } catch (SQLException e) {
+            log.error("database error, could not load anonymous resource",e);
+            throw new IllegalStateException("database error, could not load anonymous resource",e);
+        }
+    }
+
+
+
     /**
      * Handles a comment.
      *
@@ -80,6 +359,5 @@ public class KiWiHandler implements RDFHandler {
      */
     @Override
     public void handleComment(String comment) throws RDFHandlerException {
-        //To change body of implemented methods use File | Settings | File Templates.
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/e7692e68/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java
index 652e3e5..2ca0475 100644
--- a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java
+++ b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoader.java
@@ -17,17 +17,6 @@
  */
 package org.apache.marmotta.kiwi.loader;
 
-import java.io.Console;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.regex.Pattern;
-import java.util.zip.GZIPInputStream;
-
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.CommandLineParser;
 import org.apache.commons.cli.HelpFormatter;
@@ -44,19 +33,28 @@ import org.apache.marmotta.kiwi.persistence.h2.H2Dialect;
 import org.apache.marmotta.kiwi.persistence.mysql.MySQLDialect;
 import org.apache.marmotta.kiwi.persistence.pgsql.PostgreSQLDialect;
 import org.apache.marmotta.kiwi.sail.KiWiStore;
-import org.apache.marmotta.kiwi.transactions.api.TransactionalSail;
-import org.apache.marmotta.kiwi.transactions.sail.KiWiTransactionalSail;
-import org.openrdf.model.Resource;
 import org.openrdf.repository.RepositoryException;
 import org.openrdf.repository.sail.SailRepository;
-import org.openrdf.repository.sail.SailRepositoryConnection;
 import org.openrdf.rio.RDFFormat;
+import org.openrdf.rio.RDFHandlerException;
 import org.openrdf.rio.RDFParseException;
+import org.openrdf.rio.RDFParser;
+import org.openrdf.rio.Rio;
 import org.openrdf.rio.UnsupportedRDFormatException;
-import org.openrdf.sail.Sail;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.Console;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+import java.util.zip.GZIPInputStream;
+
 /**
  * {@link KiWiLoader} is a fastpath importer into a kiwi database. It is meant
  * for importing large datafiles into a kiwi RDF store, avoiding the overhead of
@@ -117,6 +115,8 @@ public class KiWiLoader {
     protected String context;
     protected boolean isVersioningEnabled;
     protected boolean isReasoningEnabled;
+
+    protected KiWiStore store;
     protected SailRepository repository;
 
     public KiWiLoader(KiWiConfiguration kiwi, String baseUri, String context) {
@@ -320,28 +320,19 @@ public class KiWiLoader {
      */
     public void load(InputStream inStream, RDFFormat forFileName) throws RDFParseException, IOException {
         try {
-            final SailRepositoryConnection con = repository.getConnection();
-            try {
-                con.begin();
-
-                final Resource[] ctx;
-                if (context != null) {
-                    ctx = new Resource[] { con.getValueFactory().createURI(context) };
-                } else {
-                    ctx = new Resource[] {};
-                }
+            KiWiLoaderConfiguration config = new KiWiLoaderConfiguration();
+            if (context != null) {
+                config.setContext(context);
+            }
 
-                con.add(inStream, baseUri, forFileName, ctx);
+            KiWiHandler handler = new KiWiHandler(store,config);
 
-                con.commit();
-            } catch (final Throwable t) {
-                con.rollback();
-                throw t;
-            } finally {
-                con.close();
-            }
-        } catch (RepositoryException re) {
-            log.error("RepositoryException: {}", re.getMessage());
+            RDFParser parser = Rio.createParser(forFileName);
+            parser.setRDFHandler(handler);
+            parser.parse(inStream,baseUri);
+
+        } catch (RDFHandlerException e) {
+            log.error("RepositoryException: {}", e.getMessage());
         }
     }
 
@@ -400,31 +391,9 @@ public class KiWiLoader {
             throw new IllegalStateException("repository already initialized");
         }
         log.debug("initializing kiwi-store: {}", kiwi);
-        KiWiStore store = new KiWiStore(kiwi);
-
-        final Sail sail;
-        if (isVersioningEnabled || isReasoningEnabled) {
-            TransactionalSail tSail = new KiWiTransactionalSail(store);
-            if (isVersioningEnabled) {
-                log.debug("enabling versioning...");
-                // TODO: Add Versioning
-                // tSail = new KiWiVersioningSail(tSail);
-                log.warn("versioning not yet supported/implemented");
-            }
-
-            if (isReasoningEnabled) {
-                log.debug("enabling reasoner...");
-                // TODO: Add Reasoning
-                // tSail = new KiWiReasoningSail(tSail, null);
-                log.warn("reasoning not yet supported/implemented");
-            }
-            sail = tSail;
-        } else {
-            // no transactional sail required here
-            sail = store;
-        }
+        store = new KiWiStore(kiwi);
 
-        repository = new SailRepository(sail);
+        repository = new SailRepository(store);
         repository.initialize();
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/e7692e68/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoaderConfiguration.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoaderConfiguration.java b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoaderConfiguration.java
new file mode 100644
index 0000000..576fd40
--- /dev/null
+++ b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiLoaderConfiguration.java
@@ -0,0 +1,68 @@
+package org.apache.marmotta.kiwi.loader;
+
+/**
+ * Configuration options for the KiWiLoader
+ *
+ * @author Sebastian Schaffert (sschaffert@apache.org)
+ */
+public class KiWiLoaderConfiguration {
+
+    /**
+     * the size of a batch insert into the database; only when this number of statements has been processed will
+     * an insert statement to the database be issued.
+     */
+
+    int statementBatchSize = 1000;
+
+    /**
+     * the size of a database transaction; the database transaction will commit after this number of statements
+     */
+    int commitBatchSize = 10000;
+
+    /**
+     * If true, the importer will check if a statement already exists; this check is necessary to ensure consistency
+     * of the database, but it is also very expensive, because every triple needs to be checked. Set this option to
+     * false in case you are sure that every imported triple does not yet exist in the database.
+     */
+    boolean statementExistanceCheck = false;
+
+    /**
+     * Import into this context, ignoring context provided by the statements
+     */
+    String context;
+
+    public KiWiLoaderConfiguration() {
+    }
+
+    public int getCommitBatchSize() {
+        return commitBatchSize;
+    }
+
+    public void setCommitBatchSize(int commitBatchSize) {
+        this.commitBatchSize = commitBatchSize;
+    }
+
+    public int getStatementBatchSize() {
+        return statementBatchSize;
+    }
+
+    public void setStatementBatchSize(int statementBatchSize) {
+        this.statementBatchSize = statementBatchSize;
+    }
+
+    public boolean isStatementExistanceCheck() {
+        return statementExistanceCheck;
+    }
+
+    public void setStatementExistanceCheck(boolean statementExistanceCheck) {
+        this.statementExistanceCheck = statementExistanceCheck;
+    }
+
+    public String getContext() {
+        return context;
+    }
+
+    public void setContext(String context) {
+        this.context = context;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/e7692e68/libraries/kiwi/kiwi-loader/src/test/java/org/apache/marmotta/kiwi/loader/KiWiHandlerTest.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/src/test/java/org/apache/marmotta/kiwi/loader/KiWiHandlerTest.java b/libraries/kiwi/kiwi-loader/src/test/java/org/apache/marmotta/kiwi/loader/KiWiHandlerTest.java
new file mode 100644
index 0000000..59509a6
--- /dev/null
+++ b/libraries/kiwi/kiwi-loader/src/test/java/org/apache/marmotta/kiwi/loader/KiWiHandlerTest.java
@@ -0,0 +1,107 @@
+package org.apache.marmotta.kiwi.loader;
+
+import org.apache.marmotta.kiwi.config.KiWiConfiguration;
+import org.apache.marmotta.kiwi.sail.KiWiStore;
+import org.apache.marmotta.kiwi.test.junit.KiWiDatabaseRunner;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestWatcher;
+import org.junit.runner.Description;
+import org.junit.runner.RunWith;
+import org.openrdf.repository.Repository;
+import org.openrdf.repository.RepositoryConnection;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.repository.sail.SailRepository;
+import org.openrdf.rio.RDFFormat;
+import org.openrdf.rio.RDFParseException;
+import org.openrdf.rio.RDFParser;
+import org.openrdf.rio.Rio;
+import org.openrdf.sail.SailException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.sql.SQLException;
+
+/**
+ * Add file description here!
+ *
+ * @author Sebastian Schaffert (sschaffert@apache.org)
+ */
+@RunWith(KiWiDatabaseRunner.class)
+public class KiWiHandlerTest {
+
+    private KiWiStore store;
+    private Repository repository;
+
+    private final KiWiConfiguration dbConfig;
+
+    public KiWiHandlerTest(KiWiConfiguration dbConfig) {
+        this.dbConfig = dbConfig;
+        dbConfig.setFulltextEnabled(true);
+        dbConfig.setFulltextLanguages(new String[] {"en"});
+    }
+
+
+    @Before
+    public void initDatabase() throws RepositoryException, IOException, RDFParseException, SailException {
+        store = new KiWiStore(dbConfig);
+        repository = new SailRepository(store);
+        repository.initialize();
+    }
+
+    @After
+    public void dropDatabase() throws RepositoryException, SQLException, SailException {
+        store.getPersistence().dropDatabase();
+        repository.shutDown();
+    }
+
+    final Logger logger =
+            LoggerFactory.getLogger(this.getClass());
+
+    @Rule
+    public TestWatcher watchman = new TestWatcher() {
+        /**
+         * Invoked when a test is about to start
+         */
+        @Override
+        protected void starting(Description description) {
+            logger.info("{} being run...", description.getMethodName());
+        }
+    };
+
+    @Test
+    public void testImport() throws Exception {
+
+        // bulk import
+        long start = System.currentTimeMillis();
+        RDFParser parser = Rio.createParser(RDFFormat.RDFXML);
+        parser.setRDFHandler(new KiWiHandler(store, new KiWiLoaderConfiguration()));
+        parser.parse(this.getClass().getResourceAsStream("demo-data.foaf"),"");
+
+        logger.info("bulk import in {} ms", System.currentTimeMillis() - start);
+
+        // check presence of data
+        try {
+            RepositoryConnection con = repository.getConnection();
+            try {
+                con.begin();
+
+                Assert.assertTrue(con.hasStatement(null,null,null,true));
+
+                con.commit();
+            } catch(RepositoryException ex) {
+                con.rollback();
+            } finally {
+                con.close();
+            }
+        } catch(RepositoryException ex) {
+            ex.printStackTrace(); // TODO: handle error
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/e7692e68/libraries/kiwi/kiwi-loader/src/test/java/org/apache/marmotta/kiwi/loader/KiWiLoaderTest.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/src/test/java/org/apache/marmotta/kiwi/loader/KiWiLoaderTest.java b/libraries/kiwi/kiwi-loader/src/test/java/org/apache/marmotta/kiwi/loader/KiWiLoaderTest.java
index 3f71727..86c31f1 100644
--- a/libraries/kiwi/kiwi-loader/src/test/java/org/apache/marmotta/kiwi/loader/KiWiLoaderTest.java
+++ b/libraries/kiwi/kiwi-loader/src/test/java/org/apache/marmotta/kiwi/loader/KiWiLoaderTest.java
@@ -1,17 +1,5 @@
 package org.apache.marmotta.kiwi.loader;
 
-import static org.hamcrest.CoreMatchers.containsString;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Properties;
-import java.util.zip.GZIPOutputStream;
-
 import org.apache.commons.io.IOUtils;
 import org.apache.marmotta.kiwi.config.KiWiConfiguration;
 import org.apache.marmotta.kiwi.persistence.h2.H2Dialect;
@@ -31,6 +19,18 @@ import org.openrdf.repository.RepositoryException;
 import org.openrdf.rio.RDFFormat;
 import org.openrdf.rio.RDFParseException;
 
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Properties;
+import java.util.zip.GZIPOutputStream;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
 public class KiWiLoaderTest {
 
     @Rule
@@ -107,7 +107,7 @@ public class KiWiLoaderTest {
     public void testLoadFile() throws RepositoryException, RDFParseException,
             IOException {
         KiWiTestLoader loader = new KiWiTestLoader(getKiWiConfig(),
-                "htto://example.com/test/", null);
+                "http://example.com/test/", null);
         loader.initialize();
 
         loader.load(dataFile.getAbsolutePath(), RDFFormat.RDFXML, false);
@@ -137,7 +137,7 @@ public class KiWiLoaderTest {
         os.close();
         
         KiWiTestLoader loader = new KiWiTestLoader(getKiWiConfig(),
-                "htto://example.com/test/", null);
+                "http://example.com/test/", null);
         loader.initialize();
 
         loader.load(gz.getAbsolutePath(), RDFFormat.RDFXML, true);
@@ -177,7 +177,7 @@ public class KiWiLoaderTest {
     @Test
     public void testLoadInputStream() throws RepositoryException, RDFParseException, IOException {
         KiWiTestLoader loader = new KiWiTestLoader(getKiWiConfig(),
-                "htto://example.com/test/", null);
+                "http://example.com/test/", null);
         loader.initialize();
 
         loader.load(new FileInputStream(dataFile), RDFFormat.RDFXML);

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/e7692e68/libraries/kiwi/kiwi-loader/src/test/resources/org/apache/marmotta/kiwi/loader/demo-data.foaf
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/src/test/resources/org/apache/marmotta/kiwi/loader/demo-data.foaf b/libraries/kiwi/kiwi-loader/src/test/resources/org/apache/marmotta/kiwi/loader/demo-data.foaf
new file mode 100644
index 0000000..219c341
--- /dev/null
+++ b/libraries/kiwi/kiwi-loader/src/test/resources/org/apache/marmotta/kiwi/loader/demo-data.foaf
@@ -0,0 +1,78 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~      http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<rdf:RDF
+        xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+        xmlns:foaf="http://xmlns.com/foaf/0.1/"
+        xmlns:dc="http://purl.org/dc/elements/1.1/">
+
+    <foaf:Person rdf:about="http://localhost:8080/LMF/resource/hans_meier" xmlns:foaf="http://xmlns.com/foaf/0.1/">
+        <foaf:name>Hans Meier</foaf:name>
+        <dc:description xml:lang="en">Hans Meier is a software engineer living in Salzburg</dc:description>
+        <dc:description xml:lang="de">Hans Meier ist ein Softwareentwickler aus Salzburg</dc:description>
+        <foaf:interest rdf:resource="http://rdf.freebase.com/ns/en.software_engineering"/>
+        <foaf:interest rdf:resource="http://rdf.freebase.com/ns/en.linux"/>
+        <foaf:interest rdf:resource="http://dbpedia.org/resource/Java" />
+        <foaf:interest rdf:resource="http://dbpedia.org/resource/Climbing"/>
+        <foaf:based_near rdf:resource="http://sws.geonames.org/2766824/"/>
+        <foaf:depiction rdf:resource="http://localhost:8080/LMF/resource/hans_meier.jpg"/>
+
+        <foaf:age rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">29</foaf:age>
+
+        <foaf:knows rdf:resource="http://localhost:8080/LMF/resource/sepp_huber" />
+        <foaf:knows rdf:resource="http://localhost:8080/LMF/resource/anna_schmidt"/>
+
+        <foaf:account>
+            <foaf:OnlineAccount>
+                <foaf:accountName>Example</foaf:accountName>
+                <foaf:accountServiceHomepage>http://www.example.com</foaf:accountServiceHomepage>
+            </foaf:OnlineAccount>
+        </foaf:account>
+    </foaf:Person>
+
+    <foaf:Person rdf:about="http://localhost:8080/LMF/resource/sepp_huber" xmlns:foaf="http://xmlns.com/foaf/0.1/">
+        <foaf:name>Sepp Huber</foaf:name>
+        <dc:description xml:lang="en">Sepp Huber is an alpinist living in Traunstein. He is a good climber, but not as famous as his cousin Alexander Huber.</dc:description>
+        <dc:description xml:lang="de-DE">Sepp Huber ist ein Bergsteiger aus Traunstein. Er ist ein guter Kletterer.</dc:description>
+        <foaf:interest rdf:resource="http://dbpedia.org/resource/Mountaineering"/>
+        <foaf:interest rdf:resource="http://dbpedia.org/resource/Climbing"/>
+        <foaf:interest rdf:resource="http://localhost:8080/LMF/resource/Chess" />
+        <foaf:based_near rdf:resource="http://dbpedia.org/resource/Traunstein"/>
+
+        <foaf:age rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">31</foaf:age>
+
+
+        <foaf:knows rdf:resource="http://dbpedia.org/resource/Alexander_Huber" />
+        <foaf:knows rdf:resource="http://localhost:8080/LMF/resource/hans_meier" />
+    </foaf:Person>
+
+    <foaf:Person rdf:about="http://localhost:8080/LMF/resource/anna_schmidt" xmlns:foaf="http://xmlns.com/foaf/0.1/">
+        <foaf:name>Anna Schmidt</foaf:name>
+        <dc:description xml:lang="en">Anna Schmidt is working as PR manager for mountaineers coming from Garmisch-Partenkirchen. She likes mountaineering and is also a Linux enthusiast.</dc:description>
+        <foaf:interest>Literal Interest</foaf:interest>
+        <foaf:interest rdf:resource="http://dbpedia.org/resource/Mountaineering"/>
+        <foaf:interest rdf:resource="http://dbpedia.org/resource/Linux"/>
+        <foaf:interest rdf:resource="http://localhost:8080/LMF/resource/Chess" />
+        <foaf:based_near rdf:resource="http://dbpedia.org/resource/Garmisch-Partenkirchen"/>
+        <foaf:depiction rdf:resource="http://localhost:8080/LMF/resource/anna_schmidt.jpg"/>
+
+        <foaf:knows rdf:resource="http://dbpedia.org/resource/Alexander_Huber" />
+        <foaf:knows rdf:resource="http://localhost:8080/LMF/resource/sepp_huber" />
+    </foaf:Person>
+
+
+</rdf:RDF>