You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by wi...@apache.org on 2016/05/02 11:49:54 UTC
[17/50] [abbrv] marmotta git commit: [MARMOTTA-621] implement a
loader backend for Ostrich,
experiment with DBPedia gives arounf 45k triples/sec
[MARMOTTA-621] implement a loader backend for Ostrich, experiment with DBPedia gives arounf 45k triples/sec
Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/8f60cf64
Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/8f60cf64
Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/8f60cf64
Branch: refs/heads/MARMOTTA-584
Commit: 8f60cf64dc8b8e99d89dbf4e54fa81af3635606f
Parents: 4eb074d
Author: Sebastian Schaffert <ss...@apache.org>
Authored: Sun Dec 20 12:40:52 2015 +0100
Committer: Sebastian Schaffert <ss...@apache.org>
Committed: Sun Dec 20 12:40:52 2015 +0100
----------------------------------------------------------------------
libraries/ostrich/backend/CMakeLists.txt | 2 +-
.../ostrich/sail/OstrichSailConnection.java | 14 +-
loader/marmotta-loader-ostrich/pom.xml | 83 ++++++++++
.../loader/ostrich/OstrichLoaderBackend.java | 100 ++++++++++++
.../loader/ostrich/OstrichLoaderHandler.java | 156 +++++++++++++++++++
...org.apache.marmotta.loader.api.LoaderBackend | 18 +++
.../src/main/resources/logback.xml | 32 ++++
loader/pom.xml | 10 ++
8 files changed, 407 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/marmotta/blob/8f60cf64/libraries/ostrich/backend/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/libraries/ostrich/backend/CMakeLists.txt b/libraries/ostrich/backend/CMakeLists.txt
index 5a8f110..61156a5 100644
--- a/libraries/ostrich/backend/CMakeLists.txt
+++ b/libraries/ostrich/backend/CMakeLists.txt
@@ -17,7 +17,7 @@ find_package (GLog REQUIRED)
find_package (Boost 1.54.0 COMPONENTS iostreams filesystem system)
find_package (Tcmalloc)
-#add_definitions(-DNDEBUG)
+add_definitions(-DNDEBUG)
if (Boost_IOSTREAMS_FOUND)
message(STATUS "Enabling gzip/bzip2 support (Boost iostreams found)")
http://git-wip-us.apache.org/repos/asf/marmotta/blob/8f60cf64/libraries/ostrich/client/src/main/java/org/apache/marmotta/ostrich/sail/OstrichSailConnection.java
----------------------------------------------------------------------
diff --git a/libraries/ostrich/client/src/main/java/org/apache/marmotta/ostrich/sail/OstrichSailConnection.java b/libraries/ostrich/client/src/main/java/org/apache/marmotta/ostrich/sail/OstrichSailConnection.java
index 93e21ac..e85fdd2 100644
--- a/libraries/ostrich/client/src/main/java/org/apache/marmotta/ostrich/sail/OstrichSailConnection.java
+++ b/libraries/ostrich/client/src/main/java/org/apache/marmotta/ostrich/sail/OstrichSailConnection.java
@@ -107,7 +107,7 @@ public class OstrichSailConnection extends NotifyingSailConnectionBase {
@Override
protected void addStatementInternal(Resource subj, URI pred, Value obj, Resource... contexts) throws SailException {
- log.info("Adding statements.");
+ log.debug("Adding statements.");
ensureTransaction();
if (contexts.length > 0) {
@@ -319,7 +319,7 @@ public class OstrichSailConnection extends NotifyingSailConnectionBase {
@Override
protected void removeStatementsInternal(Resource subj, URI pred, Value obj, Resource... contexts) throws SailException {
- log.info("Removing statements.");
+ log.debug("Removing statements.");
commitForQuery();
ensureTransaction();
@@ -338,7 +338,7 @@ public class OstrichSailConnection extends NotifyingSailConnectionBase {
@Override
protected void clearInternal(Resource... contexts) throws SailException {
- log.info("Clearing statements.");
+ log.debug("Clearing statements.");
commitForQuery();
ensureTransaction();
@@ -357,7 +357,7 @@ public class OstrichSailConnection extends NotifyingSailConnectionBase {
@Override
protected CloseableIteration<? extends Namespace, SailException> getNamespacesInternal() throws SailException {
- log.info("Getting namespaces.");
+ log.debug("Getting namespaces.");
commitForQuery();
Empty pattern = Empty.getDefaultInstance();
@@ -382,7 +382,7 @@ public class OstrichSailConnection extends NotifyingSailConnectionBase {
@Override
protected void setNamespaceInternal(String prefix, String name) throws SailException {
- log.info("Setting namespace {} = {}.", prefix, name);
+ log.debug("Setting namespace {} = {}.", prefix, name);
ensureTransaction();
ProtoNamespace ns = new ProtoNamespace(prefix, name);
@@ -393,7 +393,7 @@ public class OstrichSailConnection extends NotifyingSailConnectionBase {
@Override
protected void removeNamespaceInternal(String prefix) throws SailException {
- log.info("Removing namespace {}.", prefix);
+ log.debug("Removing namespace {}.", prefix);
commitForQuery();
ensureTransaction();
@@ -404,7 +404,7 @@ public class OstrichSailConnection extends NotifyingSailConnectionBase {
@Override
protected void clearNamespacesInternal() throws SailException {
- log.info("Clearing namespaces.");
+ log.debug("Clearing namespaces.");
commitForQuery();
ensureTransaction();
http://git-wip-us.apache.org/repos/asf/marmotta/blob/8f60cf64/loader/marmotta-loader-ostrich/pom.xml
----------------------------------------------------------------------
diff --git a/loader/marmotta-loader-ostrich/pom.xml b/loader/marmotta-loader-ostrich/pom.xml
new file mode 100644
index 0000000..5816b68
--- /dev/null
+++ b/loader/marmotta-loader-ostrich/pom.xml
@@ -0,0 +1,83 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.marmotta</groupId>
+ <artifactId>marmotta-parent</artifactId>
+ <version>3.4.0-SNAPSHOT</version>
+ <relativePath>../../parent</relativePath>
+ </parent>
+
+ <artifactId>marmotta-loader-ostrich</artifactId>
+ <name>Loader: Ostrich Backend</name>
+
+ <description>
+ Apache Marmotta bulk loader backend for loading large datasets into a Ostrich/LevelDB backend.
+ </description>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>2.2</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <createDependencyReducedPom>false</createDependencyReducedPom>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer" />
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+ <mainClass>org.apache.marmotta.loader.core.MarmottaLoader</mainClass>
+ </transformer>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
+ <addHeader>false</addHeader>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.marmotta</groupId>
+ <artifactId>marmotta-loader-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.marmotta</groupId>
+ <artifactId>ostrich-model</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.marmotta</groupId>
+ <artifactId>ostrich-client</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ </dependencies>
+</project>
http://git-wip-us.apache.org/repos/asf/marmotta/blob/8f60cf64/loader/marmotta-loader-ostrich/src/main/java/org/apache/marmotta/loader/ostrich/OstrichLoaderBackend.java
----------------------------------------------------------------------
diff --git a/loader/marmotta-loader-ostrich/src/main/java/org/apache/marmotta/loader/ostrich/OstrichLoaderBackend.java b/loader/marmotta-loader-ostrich/src/main/java/org/apache/marmotta/loader/ostrich/OstrichLoaderBackend.java
new file mode 100644
index 0000000..bd6e49e
--- /dev/null
+++ b/loader/marmotta-loader-ostrich/src/main/java/org/apache/marmotta/loader/ostrich/OstrichLoaderBackend.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.marmotta.loader.ostrich;
+
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.configuration.Configuration;
+import org.apache.marmotta.loader.api.LoaderBackend;
+import org.apache.marmotta.loader.api.LoaderHandler;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Ostrich loader backend. Provides configuration for the OstrichLoaderHandler.
+ *
+ * @author Sebastian Schaffert (sschaffert@apache.org)
+ */
+public class OstrichLoaderBackend implements LoaderBackend {
+
+ /**
+ * Create the RDFHandler to be used for bulk-loading, optionally using the configuration passed as argument.
+ *
+ * @param configuration
+ * @return a newly created RDFHandler instance
+ */
+ @Override
+ public LoaderHandler createLoader(Configuration configuration) {
+ return new OstrichLoaderHandler(
+ configuration.getString("backend.ostrich.host", "localhost"),
+ configuration.getInt("backend.ostrich.port", 10000),
+ configuration.getLong("backend.ostrich.batchsize", 500000));
+ }
+
+ /**
+ * Return a unique identifier for the loader; used for identifying the loader to choose on the command line
+ * in case more than one loader implementation is available.
+ * <p/>
+ * Should match with the regular expression [a-z][a-z0-9]*
+ *
+ * @return
+ */
+ @Override
+ public String getIdentifier() {
+ return "ostrich";
+ }
+
+ /**
+ * Return any additional options that this backend offers (e.g. for connecting to a database etc).
+ * If there are no additional options, return an empty collection.
+ *
+ * @return
+ */
+ @Override
+ public Collection<Option> getOptions() {
+ Set<Option> options = new HashSet<>();
+
+ Option host =
+ OptionBuilder.withArgName("host")
+ .hasArgs(1)
+ .withDescription("hostname or IP address of Ostrich/LevelDB server")
+ .withLongOpt("host")
+ .create('H');
+ options.add(host);
+
+ Option port =
+ OptionBuilder.withArgName("port")
+ .hasArgs(1)
+ .withDescription("port used by Ostrich/LevelDB server")
+ .withLongOpt("port")
+ .create('P');
+ options.add(port);
+
+ Option batchSize =
+ OptionBuilder.withArgName("batchsize")
+ .hasArgs(1)
+ .withDescription("maximum number of statements to commit in one batch")
+ .withLongOpt("batchsize")
+ .create('B');
+ options.add(batchSize);
+
+ return options;
+ }
+}
http://git-wip-us.apache.org/repos/asf/marmotta/blob/8f60cf64/loader/marmotta-loader-ostrich/src/main/java/org/apache/marmotta/loader/ostrich/OstrichLoaderHandler.java
----------------------------------------------------------------------
diff --git a/loader/marmotta-loader-ostrich/src/main/java/org/apache/marmotta/loader/ostrich/OstrichLoaderHandler.java b/loader/marmotta-loader-ostrich/src/main/java/org/apache/marmotta/loader/ostrich/OstrichLoaderHandler.java
new file mode 100644
index 0000000..675d25a
--- /dev/null
+++ b/loader/marmotta-loader-ostrich/src/main/java/org/apache/marmotta/loader/ostrich/OstrichLoaderHandler.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.marmotta.loader.ostrich;
+
+import org.apache.marmotta.loader.api.LoaderHandler;
+import org.apache.marmotta.ostrich.sail.OstrichSail;
+import org.openrdf.model.Statement;
+import org.openrdf.rio.RDFHandlerException;
+import org.openrdf.sail.SailConnection;
+import org.openrdf.sail.SailException;
+
+/**
+ * Add file description here!
+ *
+ * @author Sebastian Schaffert (sschaffert@apache.org)
+ */
+public class OstrichLoaderHandler implements LoaderHandler {
+
+ private OstrichSail sail;
+ private SailConnection con;
+
+ long count = 0;
+ long batchSize = 500000;
+
+ public OstrichLoaderHandler(String host, int port, long batchSize) {
+ this.batchSize = batchSize;
+ this.sail = new OstrichSail(host,port);
+ }
+
+ /**
+ * Initialise the handler, performing any initialisation steps that are necessary before bulk importing can
+ * start (e.g. dropping indexes or establishing a connection).
+ *
+ * @throws RDFHandlerException
+ */
+ @Override
+ public void initialise() throws RDFHandlerException {
+ try {
+ sail.initialize();
+ con = sail.getConnection();
+ } catch (SailException e) {
+ throw new RDFHandlerException("Could not establish Ostrich connection", e);
+ }
+ }
+
+ /**
+ * Peform cleanup on shutdown, e.g. re-creating indexes after import completed or freeing resources acquired by
+ * the handler.
+ */
+ @Override
+ public void shutdown() throws RDFHandlerException {
+ try {
+ con.close();
+ sail.shutDown();
+ } catch (SailException e) {
+ throw new RDFHandlerException("Could not close Ostrich connection", e);
+ }
+
+ }
+
+ /**
+ * Signals the start of the RDF data. This method is called before any data
+ * is reported.
+ *
+ * @throws RDFHandlerException If the RDF handler has encountered an unrecoverable error.
+ */
+ @Override
+ public void startRDF() throws RDFHandlerException {
+ try {
+ con.begin();
+ } catch (SailException e) {
+ throw new RDFHandlerException("Could not start transaction", e);
+ }
+ }
+
+ /**
+ * Signals the end of the RDF data. This method is called when all data has
+ * been reported.
+ *
+ * @throws RDFHandlerException If the RDF handler has encountered an unrecoverable error.
+ */
+ @Override
+ public void endRDF() throws RDFHandlerException {
+ try {
+ con.commit();
+ } catch (SailException e) {
+ throw new RDFHandlerException("Could not commit transaction", e);
+ }
+ }
+
+ /**
+ * Handles a namespace declaration/definition. A namespace declaration
+ * associates a (short) prefix string with the namespace's URI. The prefix
+ * for default namespaces, which do not have an associated prefix, are
+ * represented as empty strings.
+ *
+ * @param prefix The prefix for the namespace, or an empty string in case of a
+ * default namespace.
+ * @param uri The URI that the prefix maps to.
+ * @throws RDFHandlerException If the RDF handler has encountered an unrecoverable error.
+ */
+ @Override
+ public void handleNamespace(String prefix, String uri) throws RDFHandlerException {
+ try {
+ con.setNamespace(prefix, uri);
+ } catch (SailException e) {
+ throw new RDFHandlerException("Could not add namespace", e);
+ }
+ }
+
+ /**
+ * Handles a statement.
+ *
+ * @param st The statement.
+ * @throws RDFHandlerException If the RDF handler has encountered an unrecoverable error.
+ */
+ @Override
+ public void handleStatement(Statement st) throws RDFHandlerException {
+ try {
+ con.addStatement(st.getSubject(), st.getPredicate(), st.getObject(), st.getContext());
+
+ if (++count % batchSize == 0) {
+ con.commit();
+ con.begin();
+ }
+ } catch (SailException e) {
+ throw new RDFHandlerException("Could not add statement", e);
+ }
+ }
+
+ /**
+ * Handles a comment.
+ *
+ * @param comment The comment.
+ * @throws RDFHandlerException If the RDF handler has encountered an unrecoverable error.
+ */
+ @Override
+ public void handleComment(String comment) throws RDFHandlerException {
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/marmotta/blob/8f60cf64/loader/marmotta-loader-ostrich/src/main/resources/META-INF/services/org.apache.marmotta.loader.api.LoaderBackend
----------------------------------------------------------------------
diff --git a/loader/marmotta-loader-ostrich/src/main/resources/META-INF/services/org.apache.marmotta.loader.api.LoaderBackend b/loader/marmotta-loader-ostrich/src/main/resources/META-INF/services/org.apache.marmotta.loader.api.LoaderBackend
new file mode 100644
index 0000000..ca1e9a1
--- /dev/null
+++ b/loader/marmotta-loader-ostrich/src/main/resources/META-INF/services/org.apache.marmotta.loader.api.LoaderBackend
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+org.apache.marmotta.loader.ostrich.OstrichLoaderBackend
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/marmotta/blob/8f60cf64/loader/marmotta-loader-ostrich/src/main/resources/logback.xml
----------------------------------------------------------------------
diff --git a/loader/marmotta-loader-ostrich/src/main/resources/logback.xml b/loader/marmotta-loader-ostrich/src/main/resources/logback.xml
new file mode 100644
index 0000000..ebc7937
--- /dev/null
+++ b/loader/marmotta-loader-ostrich/src/main/resources/logback.xml
@@ -0,0 +1,32 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<configuration>
+ <appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
+ <encoder>
+ <pattern>%d{HH:mm:ss.SSS} %level %logger{15} - %m%n</pattern>
+ </encoder>
+ </appender>
+
+ <logger name="org.apache.zookeeper" level="WARN" />
+ <logger name="org.apache.hadoop" level="WARN" />
+
+
+ <root level="${root-level:-INFO}">
+ <appender-ref ref="CONSOLE"/>
+ </root>
+</configuration>
http://git-wip-us.apache.org/repos/asf/marmotta/blob/8f60cf64/loader/pom.xml
----------------------------------------------------------------------
diff --git a/loader/pom.xml b/loader/pom.xml
index dbdc0f1..4771f28 100644
--- a/loader/pom.xml
+++ b/loader/pom.xml
@@ -92,4 +92,14 @@
<module>marmotta-loader-berkeley</module>
</modules>
+ <profiles>
+ <profile>
+ <id>ostrich</id>
+ <modules>
+ <module>marmotta-loader-ostrich</module>
+ </modules>
+ </profile>
+ </profiles>
+
+
</project>