You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/11/04 15:24:57 UTC
svn commit: r1538622 - in /stanbol/trunk/entityhub/indexing/source/sesame:
./ src/ src/main/ src/main/java/ src/main/java/org/
src/main/java/org/apache/ src/main/java/org/apache/stanbol/
src/main/java/org/apache/stanbol/entityhub/ src/main/java/org/apa...
Author: rwesten
Date: Mon Nov 4 14:24:56 2013
New Revision: 1538622
URL: http://svn.apache.org/r1538622
Log:
STANBOL-1200: first commit for the Sesame Indexing Source (still work in progress)
Added:
stanbol/trunk/entityhub/indexing/source/sesame/ (with props)
stanbol/trunk/entityhub/indexing/source/sesame/pom.xml
stanbol/trunk/entityhub/indexing/source/sesame/src/
stanbol/trunk/entityhub/indexing/source/sesame/src/main/
stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/
stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/
stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/
stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/
stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/
stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/
stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/
stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/
stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/AbstractSesameBackend.java
stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/RdfIndexingSource.java
stanbol/trunk/entityhub/indexing/source/sesame/src/test/
stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/
stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/
stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/apache/
stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/apache/stanbol/
stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/apache/stanbol/entityhub/
stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/apache/stanbol/entityhub/indexing/
stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/apache/stanbol/entityhub/indexing/source/
stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/apache/stanbol/entityhub/indexing/source/sesame/
stanbol/trunk/entityhub/indexing/source/sesame/src/test/resources/
stanbol/trunk/entityhub/indexing/source/sesame/src/test/resources/log4j.properties
Propchange: stanbol/trunk/entityhub/indexing/source/sesame/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 4 14:24:56 2013
@@ -0,0 +1 @@
+
Added: stanbol/trunk/entityhub/indexing/source/sesame/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/sesame/pom.xml?rev=1538622&view=auto
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/sesame/pom.xml (added)
+++ stanbol/trunk/entityhub/indexing/source/sesame/pom.xml Mon Nov 4 14:24:56 2013
@@ -0,0 +1,149 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>apache-stanbol-entityhub-indexing</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ <relativePath>../..</relativePath>
+ </parent>
+
+ <artifactId>org.apache.stanbol.entityhub.indexing.source.sesame</artifactId>
+ <packaging>bundle</packaging>
+
+ <name>Apache Stanbol Entityhub Indexing Source for sesame</name>
+ <description>
+ Provides support for indexing RDF data by using any Sesame based RDF triple
+ Store
+ </description>
+
+ <licenses>
+ <license>
+ <name>Apache Software License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments>A business-friendly OSS license</comments>
+ </license>
+ </licenses>
+
+ <scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/entityhub/indexing/source/sesame
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/entityhub/indexing/source/sesame
+ </developerConnection>
+ <url>http://stanbol.apache.org</url>
+ </scm>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Export-Package>
+ org.apache.stanbol.entityhub.indexing.source.sesame;version=${project.version}
+ </Export-Package>
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <!-- AL20 licensed -->
+ <exclude>src/license/THIRD-PARTY.properties</exclude>
+
+ <!-- AL20 licensed files. See src/test/resources/README -->
+ <exclude>src/test/**/*.txt</exclude>
+ <exclude>src/test/**/*.nq</exclude>
+ <exclude>src/test/**/*.nt</exclude>
+ <exclude>src/test/**/*.config</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.commons.namespaceprefix.service</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.indexing.core</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.model.sesame</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.ldpath</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </dependency>
+
+ <dependency> <!-- the sesame repository API -->
+ <groupId>org.openrdf.sesame</groupId>
+ <artifactId>sesame-repository-api</artifactId>
+ </dependency>
+ <dependency> <!-- used to hold the repository config (provided as RDF graph) -->
+ <groupId>org.openrdf.sesame</groupId>
+ <artifactId>sesame-sail-memory</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.openrdf.sesame</groupId>
+ <artifactId>sesame-repository-sail</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ </dependency>
+ <!-- dependencies for testing -->
+ <dependency> <!-- used for debug level logging during tests -->
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
Added: stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/AbstractSesameBackend.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/AbstractSesameBackend.java?rev=1538622&view=auto
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/AbstractSesameBackend.java (added)
+++ stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/AbstractSesameBackend.java Mon Nov 4 14:24:56 2013
@@ -0,0 +1,374 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.entityhub.indexing.source.sesame;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+import java.util.concurrent.ThreadPoolExecutor;
+
+import javax.xml.datatype.XMLGregorianCalendar;
+
+
+import org.openrdf.model.BNode;
+import org.openrdf.model.Literal;
+import org.openrdf.model.Resource;
+import org.openrdf.model.Statement;
+import org.openrdf.model.Value;
+import org.openrdf.model.ValueFactory;
+import org.openrdf.repository.RepositoryConnection;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.repository.RepositoryResult;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import at.newmedialab.ldpath.api.backend.RDFBackend;
+
+/**
+ * Sesame Backend based on the code of
+ * <code>org.apache.marmotta.ldpath.backend.sesame.AbstractSesameBackend</code>
+ * (module <code>org.apache.marmotta:ldpath-backend-sesame:3.1.0-incubating</code>.
+ * <p>
+ * TODO: as soon as the LDPath dependency is updated to the current
+ * Marmotta version this should be removed and extend the current Marmotta version
+ *
+ */
+public abstract class AbstractSesameBackend implements RDFBackend<Value> {
+
+ private static final Logger log = LoggerFactory.getLogger(AbstractSesameBackend.class);
+
+ protected org.openrdf.model.URI createURIInternal(final ValueFactory valueFactory, String uri) {
+ return valueFactory.createURI(uri);
+ }
+
+ protected Literal createLiteralInternal(final ValueFactory valueFactory, String content) {
+ log.debug("creating literal with content \"{}\"",content);
+ return valueFactory.createLiteral(content);
+ }
+
+ protected Literal createLiteralInternal(final ValueFactory valueFactory, String content,
+ Locale language, URI type) {
+ if(log.isDebugEnabled()){
+ log.debug("creating literal with content \"{}\", language {}, datatype {}",
+ new Object[]{content,language,type});
+ }
+ if(language == null && type == null) {
+ return valueFactory.createLiteral(content);
+ } else if(type == null) {
+ return valueFactory.createLiteral(content,language.getLanguage());
+ } else {
+ return valueFactory.createLiteral(content, valueFactory.createURI(type.toString()));
+ }
+ }
+
+ protected Collection<Value> listObjectsInternal(RepositoryConnection connection,
+ Resource subject, org.openrdf.model.URI property, boolean includeInferred,
+ Resource...context)
+ throws RepositoryException {
+ ValueFactory valueFactory = connection.getValueFactory();
+
+ Set<Value> result = new HashSet<Value>();
+ RepositoryResult<Statement> qResult = connection.getStatements(
+ merge(subject, connection.getValueFactory()),
+ merge(property, connection.getValueFactory()), null,
+ includeInferred, context);
+ try {
+ while(qResult.hasNext()) {
+ result.add(qResult.next().getObject());
+ }
+ } finally {
+ qResult.close();
+ }
+ return result;
+ }
+
+ protected Collection<Value> listSubjectsInternal(final RepositoryConnection connection,
+ org.openrdf.model.URI property, Value object, boolean includeInferred,
+ Resource...context)
+ throws RepositoryException {
+ Set<Value> result = new HashSet<Value>();
+ RepositoryResult<Statement> qResult = connection.getStatements(null,
+ merge(property, connection.getValueFactory()),
+ merge(object, connection.getValueFactory()), includeInferred,
+ context);
+ try {
+ while(qResult.hasNext()) {
+ result.add(qResult.next().getSubject());
+ }
+ } finally {
+ qResult.close();
+ }
+ return result;
+ }
+
+ /**
+ * Merge the value given as argument into the value factory given as argument
+ * @param value
+ * @param vf
+ * @param <T>
+ * @return
+ */
+ protected <T extends Value> T merge(T value, ValueFactory vf) {
+ if(value instanceof org.openrdf.model.URI) {
+ return (T)vf.createURI(value.stringValue());
+ } else if(value instanceof BNode) {
+ return (T)vf.createBNode(((BNode) value).getID());
+ } else {
+ return value;
+ }
+ }
+
+ @Override
+ public abstract Literal createLiteral(String content);
+
+ @Override
+ public abstract Literal createLiteral(String content, Locale language, URI type);
+
+ @Override
+ public abstract org.openrdf.model.URI createURI(String uri);
+
+ @Override
+ public abstract Collection<Value> listObjects(Value subject, Value property);
+
+ @Override
+ public abstract Collection<Value> listSubjects(Value property, Value object);
+
+ @Override
+ @Deprecated
+ public boolean supportsThreading() {
+ return false;
+ }
+
+ @Override
+ @Deprecated
+ public ThreadPoolExecutor getThreadPool() {
+ return null;
+ }
+ /**
+ * Test whether the node passed as argument is a literal
+ *
+ * @param n the node to check
+ * @return true if the node is a literal
+ */
+ @Override
+ public boolean isLiteral(Value n) {
+ return n instanceof Literal;
+ }
+
+ /**
+ * Test whether the node passed as argument is a URI
+ *
+ * @param n the node to check
+ * @return true if the node is a URI
+ */
+ @Override
+ public boolean isURI(Value n) {
+ return n instanceof org.openrdf.model.URI;
+ }
+
+ /**
+ * Test whether the node passed as argument is a blank node
+ *
+ * @param n the node to check
+ * @return true if the node is a blank node
+ */
+ @Override
+ public boolean isBlank(Value n) {
+ return n instanceof BNode;
+ }
+
+ /**
+ * Return the language of the literal node passed as argument.
+ *
+ * @param n the literal node for which to return the language
+ * @return a Locale representing the language of the literal, or null if the literal node has no language
+ * @throws IllegalArgumentException in case the node is no literal
+ */
+ @Override
+ public Locale getLiteralLanguage(Value n) {
+ try {
+ if(((Literal)n).getLanguage() != null) {
+ return new Locale( ((Literal)n).getLanguage() );
+ } else {
+ return null;
+ }
+ } catch (ClassCastException e) {
+ throw new IllegalArgumentException("Value "+n.stringValue()+" is not a literal" +
+ "but of type "+debugType(n));
+ }
+ }
+
+ /**
+ * Return the URI of the type of the literal node passed as argument.
+ *
+ * @param n the literal node for which to return the typer
+ * @return a URI representing the type of the literal content, or null if the literal is untyped
+ * @throws IllegalArgumentException in case the node is no literal
+ */
+ @Override
+ public URI getLiteralType(Value n) {
+ try {
+ if(((Literal)n).getDatatype() != null) {
+ try {
+ return new URI(((Literal)n).getDatatype().stringValue());
+ } catch (URISyntaxException e) {
+ log.error("literal datatype was not a valid URI: {}",((Literal) n).getDatatype());
+ return null;
+ }
+ } else {
+ return null;
+ }
+ } catch (ClassCastException e) {
+ throw new IllegalArgumentException("Value "+n.stringValue()+" is not a literal" +
+ "but of type "+debugType(n));
+ }
+ }
+
+ /**
+ * Return the string value of a node. For a literal, this will be the content, for a URI node it will be the
+ * URI itself, and for a blank node it will be the identifier of the node.
+ *
+ * @param value
+ * @return
+ */
+ @Override
+ public String stringValue(Value value) {
+ return value.stringValue();
+ }
+
+ @Override
+ public BigDecimal decimalValue(Value node) {
+ try {
+ return ((Literal)node).decimalValue();
+ } catch (ClassCastException e) {
+ throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+ "but of type "+debugType(node));
+ }
+ }
+
+ @Override
+ public BigInteger integerValue(Value node) {
+ try {
+ return ((Literal)node).integerValue();
+ } catch (ClassCastException e) {
+ throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+ "but of type "+debugType(node));
+ }
+ }
+
+ @Override
+ public Boolean booleanValue(Value node) {
+ try {
+ return ((Literal)node).booleanValue();
+ } catch (ClassCastException e) {
+ throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+ "but of type "+debugType(node));
+ }
+ }
+
+ @Override
+ public Date dateTimeValue(Value node) {
+ try {
+ XMLGregorianCalendar cal = ((Literal)node).calendarValue();
+ //TODO: check if we need to deal with timezone and Local here
+ return cal.toGregorianCalendar().getTime();
+ } catch (ClassCastException e) {
+ throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+ "but of type "+debugType(node));
+ }
+ }
+
+ @Override
+ public Date dateValue(Value node) {
+ try {
+ XMLGregorianCalendar cal = ((Literal)node).calendarValue();
+ return cal.toGregorianCalendar().getTime();
+ } catch (ClassCastException e) {
+ throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+ "but of type "+debugType(node));
+ }
+ }
+
+ @Override
+ public Date timeValue(Value node) {
+ //TODO: Unless someone knows how to create a Date that only has the time
+ // from a XMLGregorianCalendar
+ return dateTimeValue(node);
+ }
+
+ @Override
+ public Long longValue(Value node) {
+ try {
+ return ((Literal)node).longValue();
+ } catch (ClassCastException e) {
+ throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+ "but of type "+debugType(node));
+ }
+ }
+
+ @Override
+ public Double doubleValue(Value node) {
+ try {
+ return ((Literal)node).doubleValue();
+ } catch (ClassCastException e) {
+ throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+ "but of type "+debugType(node));
+ }
+ }
+
+ @Override
+ public Float floatValue(Value node) {
+ try {
+ return ((Literal)node).floatValue();
+ } catch (ClassCastException e) {
+ throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+ "but of type "+debugType(node));
+ }
+ }
+
+ @Override
+ public Integer intValue(Value node) {
+ try {
+ return ((Literal)node).intValue();
+ } catch (ClassCastException e) {
+ throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+ "but of type "+debugType(node));
+ }
+ }
+
+
+ /**
+ * Prints the type (URI,bNode,literal) by inspecting the parsed {@link Value}
+ * to improve error messages and other loggings. In case of literals
+ * also the {@link #getLiteralType(Value) literal type} is printed
+ * @param value the value or <code>null</code>
+ * @return the type as string.
+ */
+ protected String debugType(Value value) {
+ return value == null ? "null":isURI(value)?"URI":isBlank(value)?"bNode":
+ "literal ("+getLiteralType(value)+")";
+ }
+
+
+}
Added: stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/RdfIndexingSource.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/RdfIndexingSource.java?rev=1538622&view=auto
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/RdfIndexingSource.java (added)
+++ stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/RdfIndexingSource.java Mon Nov 4 14:24:56 2013
@@ -0,0 +1,541 @@
+package org.apache.stanbol.entityhub.indexing.source.sesame;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataIterator;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataProvider;
+import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
+import org.apache.stanbol.entityhub.model.sesame.RdfRepresentation;
+import org.apache.stanbol.entityhub.model.sesame.RdfValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.openrdf.model.BNode;
+import org.openrdf.model.Graph;
+import org.openrdf.model.Literal;
+import org.openrdf.model.Model;
+import org.openrdf.model.Resource;
+import org.openrdf.model.Statement;
+import org.openrdf.model.Value;
+import org.openrdf.model.ValueFactory;
+import org.openrdf.model.impl.TreeModel;
+import org.openrdf.model.util.ModelUtil;
+import org.openrdf.repository.Repository;
+import org.openrdf.repository.RepositoryConnection;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.repository.RepositoryResult;
+import org.openrdf.repository.config.RepositoryConfig;
+import org.openrdf.repository.config.RepositoryConfigException;
+import org.openrdf.repository.config.RepositoryConfigUtil;
+import org.openrdf.repository.config.RepositoryFactory;
+import org.openrdf.repository.config.RepositoryRegistry;
+import org.openrdf.repository.sail.SailRepository;
+import org.openrdf.rio.RDFFormat;
+import org.openrdf.rio.RDFParseException;
+import org.openrdf.rio.RDFParser;
+import org.openrdf.rio.Rio;
+import org.openrdf.sail.SailConnection;
+import org.openrdf.sail.memory.MemoryStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import at.newmedialab.ldpath.api.backend.RDFBackend;
+
+public class RdfIndexingSource extends AbstractSesameBackend implements EntityDataIterable, EntityDataProvider, RDFBackend<Value> {
+
+ private final Logger log = LoggerFactory.getLogger(RdfIndexingSource.class);
+
+ public static final String PARAM_REPOSITORY_CONFIG = "config";
+
+ public static final String DEFAULT_REPOSITORY_CONFIG = "repository.ttl";
+
+ protected ValueFactory sesameFactory;
+
+ protected RdfValueFactory vf;
+
+ Repository repository;
+ //protected RepositoryConnection connection;
+
+
+
+ /**
+ * If {@link BNode} being values of outgoing triples should be followed.
+ */
+ protected boolean followBNodeState = true; //TODO: make configurable
+
+ private Resource[] contexts = new Resource[]{}; //TODO: make configurable
+
+ private boolean includeInferred = true; //TODO: make configurable
+
+ protected RepositoryConfig repoConfig;
+ private RepositoryConnection ldpathConnection;
+ private Lock ldpathConnectionLock = new ReentrantLock();
+
+ private RepositoryConnection entityDataProviderConnection;
+ private Lock entityDataProviderConnectionLock = new ReentrantLock();
+ /**
+ * {@link EntityDataIterator}s created by {@link #entityDataIterator()}
+ * do add themselves to this list while active. calling {@link #close()}
+ * to this indexing source will also call close to all iterators in this list
+ */
+ protected final List<EntityDataIterator> entityDataIterators = new CopyOnWriteArrayList<EntityDataIterator>();
+
+ @Override
+ public void setConfiguration(Map<String,Object> config) {
+ IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
+ File repoConfigFile;
+ Object value = config.get(PARAM_REPOSITORY_CONFIG);
+ if(value != null){
+ repoConfigFile = new File(indexingConfig.getConfigFolder(),value.toString());
+ } else {
+ repoConfigFile = new File(indexingConfig.getConfigFolder(),DEFAULT_REPOSITORY_CONFIG);
+ }
+ if(repoConfigFile.isFile()){ //read the config (an RDF file)
+
+ this.repoConfig = loadRepositoryConfig(repoConfigFile);
+ } else {
+ throw new IllegalArgumentException("The configured Sesame Repository configuration fiel "
+ + repoConfigFile +" is missing. Please use the '"+PARAM_REPOSITORY_CONFIG
+ + "' paramteter to configure the actual configuration file (relative "
+ + "to the config '"+indexingConfig.getConfigFolder()+"'folder)");
+ }
+ }
+
+ /**
+ * @param repoConfigFile
+ * @return
+ */
+ private RepositoryConfig loadRepositoryConfig(File repoConfigFile) {
+ Repository configRepo = new SailRepository(new MemoryStore());
+ RepositoryConnection con = null;
+ try {
+ con = configRepo.getConnection();
+ RDFFormat format = Rio.getParserFormatForFileName(repoConfigFile.getName());
+ try {
+ con.add(new InputStreamReader(
+ new FileInputStream(repoConfigFile),Charset.forName("UTF-8")),
+ null, format);
+ } catch (RDFParseException e) {
+ throw new IllegalArgumentException("Unable to parsed '"
+ + repoConfigFile+ "' using RDF format '"+ format +"'!", e);
+ } catch (IOException e) {
+ throw new IllegalArgumentException("Unable to access '"
+ + repoConfigFile+ "'!", e);
+ }
+ con.commit();
+ } catch (RepositoryException e) {
+ throw new IllegalStateException("Unable to load '"
+ + repoConfigFile+ "' to inmemory Sail!", e);
+ } finally {
+ if(con != null){
+ try {
+ con.close();
+ } catch (RepositoryException e) {/* ignore */}
+ }
+ }
+ Set<String> repoNames;
+ RepositoryConfig repoConfig;
+ try {
+ repoNames = RepositoryConfigUtil.getRepositoryIDs(configRepo);
+ if(repoNames.size() == 1){
+ repoConfig = RepositoryConfigUtil.getRepositoryConfig(configRepo, repoNames.iterator().next());
+ repoConfig.validate();
+ } else if(repoNames.size() > 1){
+ throw new IllegalArgumentException("Repository configuration file '"
+ +repoConfigFile+"' MUST only contain a single repository configuration!");
+ } else {
+ throw new IllegalArgumentException("Repository configuration file '"
+ +repoConfigFile+"' DOES NOT contain a repository configuration!");
+ }
+ } catch (RepositoryException e) {
+ throw new IllegalStateException("Unable to read RepositoryConfiguration form the "
+ + "in-memory Sail!",e);
+ } catch (RepositoryConfigException e) {
+ throw new IllegalArgumentException("Repository Configuration in '"
+ + repoConfigFile + "is not valid!",e);
+ } finally {
+ try {
+ configRepo.shutDown();
+ } catch (RepositoryException e) { /* ignore */ }
+ }
+ if(repoConfig.getRepositoryImplConfig() == null){
+ throw new IllegalArgumentException("Missing RepositoryImpl config for "
+ + "config "+repoConfig.getID()+" of file "+repoConfigFile+"!");
+ }
+ return repoConfig;
+ }
+
+ @Override
+ public boolean needsInitialisation() {
+ return true;
+ }
+
+ @Override
+ public void initialise() {
+ // TODO create the Sesame Connection
+ RepositoryFactory factory = RepositoryRegistry.getInstance().get(
+ repoConfig.getRepositoryImplConfig().getType());
+ if(factory == null){
+ throw new IllegalStateException("Unable to initialise Repository (id: "
+ + repoConfig.getID()+ ", title: "+repoConfig.getTitle() + ", impl: "
+ + repoConfig.getRepositoryImplConfig().getType()+") because no "
+ + "RepositoryFactory is present for the specified implementation!");
+ }
+ try {
+ repository = factory.getRepository(repoConfig.getRepositoryImplConfig());
+ } catch (RepositoryConfigException e) {
+ throw new IllegalStateException("Unable to initialise Repository (id: "
+ + repoConfig.getID()+ ", title: "+repoConfig.getTitle() + ", impl: "
+ + repoConfig.getRepositoryImplConfig().getType()+")!", e);
+ }
+ }
+
+ @Override
+ public void close() {
+ //first close still active RdfEntityDataIterator instances
+ for(EntityDataIterator edi : entityDataIterators){
+ edi.close();
+ }
+ //close connections used for LDPath and EntityDataProvider
+ ungetLdPathConnection();
+ ungetEntityDataProviderConnection();
+ //finally shutdown the repository
+ try {
+ repository.shutDown();
+ } catch (RepositoryException e) {
+ log.warn("Error while closing Sesame Connection", e);
+ }
+ }
+
+ @Override
+ public Representation getEntityData(String id) {
+ try {
+ return createRepresentationGraph(getEntityDataProviderConnection(),
+ sesameFactory.createURI(id));
+ } catch (RepositoryException e) {
+ ungetEntityDataProviderConnection();
+ throw new IllegalStateException("Unable to create Representation '"
+ + id + "'!", e);
+ }
+ }
+
+ @Override
+ public EntityDataIterator entityDataIterator() {
+ try {
+ return new RdfEntityDataIterator(followBNodeState, includeInferred, contexts);
+ } catch (RepositoryException e) {
+ throw new IllegalStateException("Unable to create EntityDataIterator for"
+ + "Sesame Repository "+ repoConfig.getID() + "'!", e);
+ }
+ }
+
+ protected class RdfEntityDataIterator implements EntityDataIterator {
+
+ protected final RepositoryConnection connection;
+ protected final RepositoryResult<Statement> stdItr;
+ protected final boolean followBNodes;
+
+ private org.openrdf.model.URI currentEntity = null;
+ /**
+ * The last {@link Statement} read from {@link #stdItr}
+ */
+ private Statement currentStd = null;
+ /**
+ * The current Representation as created by {@link #next()}
+ */
+ protected RdfRepresentation currentRep;
+ /**
+ * If the {@link #stdItr} is positioned on the 2nd {@link Statement}
+ * of the next Entity and {@link #currentStd} holds the first one.
+ */
+ private boolean nextInitialised = false;
+
+ protected RdfEntityDataIterator(boolean followBNodes,
+ boolean includeInferred, Resource...contexts) throws RepositoryException{
+ this.connection = repository.getConnection();
+ stdItr = connection.getStatements(null, null, null, includeInferred, contexts);
+ this.followBNodes = followBNodes;
+ entityDataIterators.add(this);
+ }
+
+ @Override
+ public boolean hasNext() {
+ if(nextInitialised){
+ return true;
+ }
+ try {
+ while(stdItr.hasNext() && !(currentStd.getSubject() instanceof org.openrdf.model.URI)){
+ currentStd = stdItr.next();
+ }
+ if(stdItr.hasNext()){
+ nextInitialised = true;
+ }
+ return nextInitialised;
+ } catch (RepositoryException e) {
+ throw new IllegalArgumentException("Exceptions while reading "
+ + "Statements after " + currentStd ,e);
+ }
+ }
+
+ @Override
+ public String next() {
+ if(nextInitialised || hasNext()){
+ final org.openrdf.model.URI subject =
+ (org.openrdf.model.URI)currentStd.getSubject();
+ currentRep = vf.createRdfRepresentation(subject);
+ try {
+ createRepresentation(subject, currentRep.getModel());
+ } catch (RepositoryException e) {
+ currentRep = null;
+ throw new IllegalStateException("Unable to read statements "
+ + "for Entity " + (currentStd != null ? currentStd.getSubject() :
+ "") +"!",e);
+ }
+ nextInitialised = false;
+ return subject.toString();
+ } else {
+ currentRep = null;
+ throw new NoSuchElementException();
+ }
+ }
+
+ /**
+ * Creates a representation by consuming Statements from the
+ * {@link #stdItr} until the subject changes. If {@link #followBNodes}
+ * is enabled it also recursively includes statements where the object
+ * is an {@link BNode}.
+ * @param subject the subject of the Representation to create
+ * @param model the model to add the Statements
+ * @throws RepositoryException
+ */
+ protected void createRepresentation(org.openrdf.model.URI subject, final Model model)
+ throws RepositoryException {
+ final Set<BNode> bnodes;
+ final Set<BNode> visited;
+ if(followBNodeState){
+ bnodes = new HashSet<BNode>();
+ visited = new HashSet<BNode>();
+ } else {
+ bnodes = null;
+ visited = null;
+ }
+ boolean next = false;
+ while(!next && stdItr.hasNext()){
+ currentStd = stdItr.next();
+ next = !subject.equals(currentStd.getSubject());
+ if(!next){
+ model.add(currentStd);
+ if(followBNodeState){ //keep referenced BNodes
+ Value object = currentStd.getObject();
+ if(object instanceof BNode){
+ bnodes.add((BNode)object);
+ }
+ } //else do not follow BNode values
+ } //else the subject has changed ... stop here
+ }
+ if(followBNodeState){ //process BNodes
+ for(BNode bnode : bnodes){
+ visited.add(bnode);
+ extractRepresentation(connection, model, bnode, visited);
+ }
+ }
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("read-only iterator!");
+ }
+
+ @Override
+ public Representation getRepresentation() {
+ if(currentRep == null){
+ throw new NoSuchElementException();
+ } else {
+ return currentRep;
+ }
+ }
+
+ @Override
+ public void close() {
+ entityDataIterators.remove(this);
+ try {
+ connection.close();
+ } catch (RepositoryException e) { /* ignore */ }
+ }
+
+ }
+
+ /**
+ * Extracts the triples that belong to the {@link Representation} with the
+ * parsed id from the Sesame repository.
+ * @param con the repository connection
+ * @param uri the subject of the Representation to extract
+ * @return the representation with the extracted data.
+ * @throws RepositoryException
+ */
+ protected RdfRepresentation createRepresentationGraph(RepositoryConnection con,
+ org.openrdf.model.URI uri) throws RepositoryException{
+ RdfRepresentation rep = vf.createRdfRepresentation(uri);
+ Model model = rep.getModel();
+ extractRepresentation(con, model, uri,
+ followBNodeState ? new HashSet<BNode>() : null);
+ return rep;
+ }
+
+ /**
+ * Extracts all {@link Statement}s part of the Representation. If
+ * {@link #followBNodeState} this is called recursively for {@link Statement}s
+ * where the value is an {@link BNode}.
+ */
+ protected void extractRepresentation(RepositoryConnection con,Model model, Resource node, Set<BNode> visited) throws RepositoryException{
+ //we need all the outgoing relations and also want to follow bNodes until
+ //the next UriRef. However we are not interested in incoming relations!
+ RepositoryResult<Statement> outgoing = con.getStatements(node, null, null, includeInferred, contexts);
+ Statement statement;
+ Set<BNode> bnodes = followBNodeState ? new HashSet<BNode>() : null;
+ while(outgoing.hasNext()){
+ statement = outgoing.next();
+ model.add(statement);
+ if(followBNodeState){
+ Value object = statement.getObject();
+ if(object instanceof BNode && !visited.contains(object)){
+ bnodes.add((BNode)object);
+ }
+ } //else do not follow values beeing BNodes
+ }
+ outgoing.close();
+ if(followBNodeState){
+ for(BNode bnode : bnodes){
+ visited.add(bnode);
+ //TODO: recursive calls could cause stackoverflows with wired graphs
+ extractRepresentation(con, model, bnode, visited);
+ }
+ }
+ }
+
+ /* -------------------------------------------------------------------------
+ * LDPath Backend methods
+ * -------------------------------------------------------------------------
+ */
+
+ @Override
+ public Literal createLiteral(String content) {
+ return createLiteralInternal(sesameFactory, content);
+ }
+
+ @Override
+ public Literal createLiteral(String content, Locale language, URI type) {
+ return createLiteralInternal(sesameFactory, content, language, type);
+ }
+
+ @Override
+ public org.openrdf.model.URI createURI(String uri) {
+ return createURIInternal(sesameFactory, uri);
+ }
+
+ @Override
+ public Collection<Value> listObjects(Value subject, Value property) {
+ try {
+ return listObjectsInternal(getLdPathConnection(), (Resource)subject,
+ asUri(property), includeInferred, contexts);
+ } catch (RepositoryException e) {
+ ungetLdPathConnection();
+ throw new IllegalStateException("Exception while accessing values for "
+ + "TriplePattern: "+subject+", "+property+", null!",e);
+ } catch (ClassCastException e){
+ throw new IllegalStateException("Subject of triple pattern MUST NOT be "
+ + "a Literal (TriplePattern: "+subject+", "+property+", null)!",e);
+ }
+ }
+
+ @Override
+ public Collection<Value> listSubjects(Value property, Value object) {
+ try {
+ return listSubjectsInternal(getLdPathConnection(), asUri(property), object,
+ includeInferred, contexts);
+ } catch (RepositoryException e) {
+ ungetLdPathConnection();
+ throw new IllegalStateException("Exception while accessing values for "
+ + "TriplePattern: null, "+property+", "+object+"!",e);
+ }
+ }
+
+ protected RepositoryConnection getLdPathConnection() throws RepositoryException {
+ if(ldpathConnection == null){
+ ldpathConnectionLock.lock();
+ try {
+ if(ldpathConnection == null){
+ ldpathConnection = repository.getConnection();
+ }
+ } finally {
+ ldpathConnectionLock.unlock();
+ }
+ }
+ return ldpathConnection;
+ }
+
+ protected void ungetLdPathConnection() {
+ ldpathConnectionLock.lock();
+ try {
+ ldpathConnection.close();
+ ldpathConnection = null;
+ } catch (RepositoryException e1) { /* ignore */
+
+ } finally {
+ ldpathConnectionLock.unlock();
+ }
+ }
+
+ protected RepositoryConnection getEntityDataProviderConnection() throws RepositoryException {
+ if(entityDataProviderConnection == null){
+ entityDataProviderConnectionLock.lock();
+ try {
+ if(entityDataProviderConnection == null){
+ entityDataProviderConnection = repository.getConnection();
+ }
+ } finally {
+ entityDataProviderConnectionLock.unlock();
+ }
+ }
+ return entityDataProviderConnection;
+ }
+
+ protected void ungetEntityDataProviderConnection() {
+ entityDataProviderConnectionLock.lock();
+ try {
+ entityDataProviderConnection.close();
+ entityDataProviderConnection = null;
+ } catch (RepositoryException e1) { /* ignore */
+
+ } finally {
+ entityDataProviderConnectionLock.unlock();
+ }
+ }
+
+
+ private org.openrdf.model.URI asUri(Value property){
+ if(property instanceof org.openrdf.model.URI){
+ return (org.openrdf.model.URI)property;
+ } else {
+ return createURI(property.stringValue());
+ }
+ }
+
+
+}
Added: stanbol/trunk/entityhub/indexing/source/sesame/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/sesame/src/test/resources/log4j.properties?rev=1538622&view=auto
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/sesame/src/test/resources/log4j.properties (added)
+++ stanbol/trunk/entityhub/indexing/source/sesame/src/test/resources/log4j.properties Mon Nov 4 14:24:56 2013
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Root logger option
+log4j.rootLogger=INFO, stdout
+
+# Direct log messages to stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.out
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
+log4j.logger.org.apache.stanbol.entityhub.indexing=DEBUG
\ No newline at end of file