You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/11/04 15:24:57 UTC

svn commit: r1538622 - in /stanbol/trunk/entityhub/indexing/source/sesame: ./ src/ src/main/ src/main/java/ src/main/java/org/ src/main/java/org/apache/ src/main/java/org/apache/stanbol/ src/main/java/org/apache/stanbol/entityhub/ src/main/java/org/apa...

Author: rwesten
Date: Mon Nov  4 14:24:56 2013
New Revision: 1538622

URL: http://svn.apache.org/r1538622
Log:
STANBOL-1200: first commit for the Sesame Indexing Source (still work in progress)

Added:
    stanbol/trunk/entityhub/indexing/source/sesame/   (with props)
    stanbol/trunk/entityhub/indexing/source/sesame/pom.xml
    stanbol/trunk/entityhub/indexing/source/sesame/src/
    stanbol/trunk/entityhub/indexing/source/sesame/src/main/
    stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/
    stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/
    stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/
    stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/
    stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/
    stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/
    stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/
    stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/
    stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/AbstractSesameBackend.java
    stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/RdfIndexingSource.java
    stanbol/trunk/entityhub/indexing/source/sesame/src/test/
    stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/
    stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/
    stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/apache/
    stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/apache/stanbol/
    stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/apache/stanbol/entityhub/
    stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/apache/stanbol/entityhub/indexing/
    stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/apache/stanbol/entityhub/indexing/source/
    stanbol/trunk/entityhub/indexing/source/sesame/src/test/java/org/apache/stanbol/entityhub/indexing/source/sesame/
    stanbol/trunk/entityhub/indexing/source/sesame/src/test/resources/
    stanbol/trunk/entityhub/indexing/source/sesame/src/test/resources/log4j.properties

Propchange: stanbol/trunk/entityhub/indexing/source/sesame/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov  4 14:24:56 2013
@@ -0,0 +1 @@
+

Added: stanbol/trunk/entityhub/indexing/source/sesame/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/sesame/pom.xml?rev=1538622&view=auto
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/sesame/pom.xml (added)
+++ stanbol/trunk/entityhub/indexing/source/sesame/pom.xml Mon Nov  4 14:24:56 2013
@@ -0,0 +1,149 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.stanbol</groupId>
+    <artifactId>apache-stanbol-entityhub-indexing</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+    <relativePath>../..</relativePath>
+  </parent>
+
+  <artifactId>org.apache.stanbol.entityhub.indexing.source.sesame</artifactId>
+  <packaging>bundle</packaging>
+
+  <name>Apache Stanbol Entityhub Indexing Source for sesame</name>
+  <description>
+    Provides support for indexing RDF data by using any Sesame based RDF triple
+    Store
+  </description>
+  
+  <licenses>
+    <license>
+      <name>Apache Software License, Version 2.0</name>
+      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+      <distribution>repo</distribution>
+      <comments>A business-friendly OSS license</comments>
+    </license>
+  </licenses>
+    
+  <scm>
+    <connection>
+      scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/entityhub/indexing/source/sesame
+    </connection>
+    <developerConnection>
+      scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/entityhub/indexing/source/sesame
+    </developerConnection>
+    <url>http://stanbol.apache.org</url>
+  </scm>
+  
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            <Export-Package>
+              org.apache.stanbol.entityhub.indexing.source.sesame;version=${project.version}
+            </Export-Package>
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <configuration>
+          <excludes>
+          	<!-- AL20 licensed -->
+          	<exclude>src/license/THIRD-PARTY.properties</exclude>
+          
+            <!-- AL20 licensed files. See src/test/resources/README -->
+            <exclude>src/test/**/*.txt</exclude>
+            <exclude>src/test/**/*.nq</exclude>
+            <exclude>src/test/**/*.nt</exclude>
+            <exclude>src/test/**/*.config</exclude>
+          </excludes>
+        </configuration>
+      </plugin>      
+    </plugins>
+  </build>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.commons.namespaceprefix.service</artifactId>
+      <version>1.0.0-SNAPSHOT</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.entityhub.indexing.core</artifactId>
+      <version>1.0.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.entityhub.model.sesame</artifactId>
+      <version>1.0.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.entityhub.ldpath</artifactId>
+      <version>1.0.0-SNAPSHOT</version>
+    </dependency>
+    
+    <dependency> <!-- the sesame repository API -->
+      <groupId>org.openrdf.sesame</groupId>
+      <artifactId>sesame-repository-api</artifactId>
+    </dependency>
+    <dependency> <!-- used to hold the repository config  (provided as RDF graph) -->
+      <groupId>org.openrdf.sesame</groupId>
+      <artifactId>sesame-sail-memory</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.openrdf.sesame</groupId>
+      <artifactId>sesame-repository-sail</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-compress</artifactId>
+    </dependency>
+    <!-- dependencies for testing -->
+    <dependency>  <!-- used for debug level logging during tests -->
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>

Added: stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/AbstractSesameBackend.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/AbstractSesameBackend.java?rev=1538622&view=auto
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/AbstractSesameBackend.java (added)
+++ stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/AbstractSesameBackend.java Mon Nov  4 14:24:56 2013
@@ -0,0 +1,374 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.entityhub.indexing.source.sesame;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+import java.util.concurrent.ThreadPoolExecutor;
+
+import javax.xml.datatype.XMLGregorianCalendar;
+
+
+import org.openrdf.model.BNode;
+import org.openrdf.model.Literal;
+import org.openrdf.model.Resource;
+import org.openrdf.model.Statement;
+import org.openrdf.model.Value;
+import org.openrdf.model.ValueFactory;
+import org.openrdf.repository.RepositoryConnection;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.repository.RepositoryResult;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import at.newmedialab.ldpath.api.backend.RDFBackend;
+
+/**
+ * Sesame Backend based on the code of 
+ * <code>org.apache.marmotta.ldpath.backend.sesame.AbstractSesameBackend</code>
+ * (module <code>org.apache.marmotta:ldpath-backend-sesame:3.1.0-incubating</code>.
+ * <p>
+ * TODO: as soon as the LDPath dependency is updated to the current 
+ * Marmotta version this should be removed and extend the current Marmotta version
+ *
+ */
+public abstract class AbstractSesameBackend implements RDFBackend<Value> {
+
+    private static final Logger log = LoggerFactory.getLogger(AbstractSesameBackend.class);
+        
+    protected org.openrdf.model.URI createURIInternal(final ValueFactory valueFactory, String uri) {
+        return valueFactory.createURI(uri);
+    }
+
+    protected Literal createLiteralInternal(final ValueFactory valueFactory, String content) {
+        log.debug("creating literal with content \"{}\"",content);
+        return valueFactory.createLiteral(content);
+    }
+
+    protected Literal createLiteralInternal(final ValueFactory valueFactory, String content,
+            Locale language, URI type) {
+        if(log.isDebugEnabled()){
+            log.debug("creating literal with content \"{}\", language {}, datatype {}",
+                new Object[]{content,language,type});
+        }
+        if(language == null && type == null) {
+            return valueFactory.createLiteral(content);
+        } else if(type == null) {
+            return valueFactory.createLiteral(content,language.getLanguage());
+        } else  {
+            return valueFactory.createLiteral(content, valueFactory.createURI(type.toString()));
+        }
+    }
+
+    protected Collection<Value> listObjectsInternal(RepositoryConnection connection, 
+        Resource subject, org.openrdf.model.URI property, boolean includeInferred, 
+        Resource...context)
+            throws RepositoryException {
+        ValueFactory valueFactory = connection.getValueFactory();
+
+        Set<Value> result = new HashSet<Value>();
+        RepositoryResult<Statement> qResult = connection.getStatements(
+            merge(subject, connection.getValueFactory()), 
+            merge(property, connection.getValueFactory()), null, 
+            includeInferred, context);
+        try {
+            while(qResult.hasNext()) {
+                result.add(qResult.next().getObject());
+            }
+        } finally {
+            qResult.close();
+        }
+        return  result;
+    }
+
+    protected Collection<Value> listSubjectsInternal(final RepositoryConnection connection, 
+        org.openrdf.model.URI property, Value object, boolean includeInferred, 
+        Resource...context)
+            throws RepositoryException {
+        Set<Value> result = new HashSet<Value>();
+        RepositoryResult<Statement> qResult = connection.getStatements(null, 
+            merge(property, connection.getValueFactory()), 
+            merge(object, connection.getValueFactory()), includeInferred,
+            context);
+        try {
+            while(qResult.hasNext()) {
+                result.add(qResult.next().getSubject());
+            }
+        } finally {
+            qResult.close();
+        }
+        return  result;
+    }
+
+    /**
+     * Merge the value given as argument into the value factory given as argument
+     * @param value
+     * @param vf
+     * @param <T>
+     * @return
+     */
+    protected <T extends Value> T merge(T value, ValueFactory vf) {
+        if(value instanceof org.openrdf.model.URI) {
+            return (T)vf.createURI(value.stringValue());
+        } else if(value instanceof BNode) {
+            return (T)vf.createBNode(((BNode) value).getID());
+        } else {
+            return value;
+        }
+    }
+
+    @Override
+    public abstract Literal createLiteral(String content);
+
+    @Override
+    public abstract Literal createLiteral(String content, Locale language, URI type);
+
+    @Override
+    public abstract org.openrdf.model.URI createURI(String uri);
+
+    @Override
+    public abstract Collection<Value> listObjects(Value subject, Value property);
+
+    @Override
+    public abstract Collection<Value> listSubjects(Value property, Value object);
+
+    @Override
+    @Deprecated
+    public boolean supportsThreading() {
+        return false;
+    }
+
+    @Override
+    @Deprecated
+    public ThreadPoolExecutor getThreadPool() {
+        return null;
+    }
+    /**
+     * Test whether the node passed as argument is a literal
+     *
+     * @param n the node to check
+     * @return true if the node is a literal
+     */
+    @Override
+    public boolean isLiteral(Value n) {
+        return n instanceof Literal;
+    }
+
+    /**
+     * Test whether the node passed as argument is a URI
+     *
+     * @param n the node to check
+     * @return true if the node is a URI
+     */
+    @Override
+    public boolean isURI(Value n) {
+        return n instanceof org.openrdf.model.URI;
+    }
+
+    /**
+     * Test whether the node passed as argument is a blank node
+     *
+     * @param n the node to check
+     * @return true if the node is a blank node
+     */
+    @Override
+    public boolean isBlank(Value n) {
+        return n instanceof BNode;
+    }
+
+    /**
+     * Return the language of the literal node passed as argument.
+     *
+     * @param n the literal node for which to return the language
+     * @return a Locale representing the language of the literal, or null if the literal node has no language
+     * @throws IllegalArgumentException in case the node is no literal
+     */
+    @Override
+    public Locale getLiteralLanguage(Value n) {
+        try {
+            if(((Literal)n).getLanguage() != null) {
+                return new Locale( ((Literal)n).getLanguage() );
+            } else {
+                return null;
+            }
+        } catch (ClassCastException e) {
+            throw new IllegalArgumentException("Value "+n.stringValue()+" is not a literal" +
+                    "but of type "+debugType(n));
+        }
+    }
+
+    /**
+     * Return the URI of the type of the literal node passed as argument.
+     *
+     * @param n the literal node for which to return the typer
+     * @return a URI representing the type of the literal content, or null if the literal is untyped
+     * @throws IllegalArgumentException in case the node is no literal
+     */
+    @Override
+    public URI getLiteralType(Value n) {
+        try {
+            if(((Literal)n).getDatatype() != null) {
+                try {
+                    return new URI(((Literal)n).getDatatype().stringValue());
+                } catch (URISyntaxException e) {
+                    log.error("literal datatype was not a valid URI: {}",((Literal) n).getDatatype());
+                    return null;
+                }
+            } else {
+                return null;
+            }
+        } catch (ClassCastException e) {
+            throw new IllegalArgumentException("Value "+n.stringValue()+" is not a literal" +
+                    "but of type "+debugType(n));
+        }
+    }
+
+    /**
+     * Return the string value of a node. For a literal, this will be the content, for a URI node it will be the
+     * URI itself, and for a blank node it will be the identifier of the node.
+     *
+     * @param value
+     * @return
+     */
+    @Override
+    public String stringValue(Value value) {
+        return value.stringValue();
+    }
+
+    @Override
+    public BigDecimal decimalValue(Value node) {
+        try {
+            return ((Literal)node).decimalValue();
+        } catch (ClassCastException e) {
+            throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+                    "but of type "+debugType(node));
+        }
+    }
+
+    @Override
+    public BigInteger integerValue(Value node) {
+        try {
+            return ((Literal)node).integerValue();
+        } catch (ClassCastException e) {
+            throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+                    "but of type "+debugType(node));
+        }
+    }
+
+    @Override
+    public Boolean booleanValue(Value node) {
+        try {
+            return ((Literal)node).booleanValue();
+        } catch (ClassCastException e) {
+            throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+                    "but of type "+debugType(node));
+        }
+    }
+
+    @Override
+    public Date dateTimeValue(Value node) {
+        try {
+            XMLGregorianCalendar cal = ((Literal)node).calendarValue();
+            //TODO: check if we need to deal with timezone and Local here
+            return cal.toGregorianCalendar().getTime();
+        } catch (ClassCastException e) {
+            throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+                    "but of type "+debugType(node));
+        }
+    }
+
+    @Override
+    public Date dateValue(Value node) {
+        try {
+            XMLGregorianCalendar cal = ((Literal)node).calendarValue();
+            return cal.toGregorianCalendar().getTime();
+        } catch (ClassCastException e) {
+            throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+                    "but of type "+debugType(node));
+        }
+    }
+
+    @Override
+    public Date timeValue(Value node) {
+        //TODO: Unless someone knows how to create a Date that only has the time
+        //      from a XMLGregorianCalendar
+        return dateTimeValue(node);
+    }
+
+    @Override
+    public Long longValue(Value node) {
+        try {
+            return ((Literal)node).longValue();
+        } catch (ClassCastException e) {
+            throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+                    "but of type "+debugType(node));
+        }
+    }
+
+    @Override
+    public Double doubleValue(Value node) {
+        try {
+            return ((Literal)node).doubleValue();
+        } catch (ClassCastException e) {
+            throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+                    "but of type "+debugType(node));
+        }
+    }
+
+    @Override
+    public Float floatValue(Value node) {
+        try {
+            return ((Literal)node).floatValue();
+        } catch (ClassCastException e) {
+            throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+                    "but of type "+debugType(node));
+        }
+    }
+
+    @Override
+    public Integer intValue(Value node) {
+        try {
+            return ((Literal)node).intValue();
+        } catch (ClassCastException e) {
+            throw new IllegalArgumentException("Value "+node.stringValue()+" is not a literal" +
+                    "but of type "+debugType(node));
+        }
+    }
+
+
+    /**
+     * Prints the type (URI,bNode,literal) by inspecting the parsed {@link Value}
+     * to improve error messages and other loggings. In case of literals 
+     * also the {@link #getLiteralType(Value) literal type} is printed
+     * @param value the value or <code>null</code> 
+     * @return the type as string.
+     */
+    protected String debugType(Value value) {
+        return value == null ? "null":isURI(value)?"URI":isBlank(value)?"bNode":
+                "literal ("+getLiteralType(value)+")";
+    }
+
+
+}

Added: stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/RdfIndexingSource.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/RdfIndexingSource.java?rev=1538622&view=auto
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/RdfIndexingSource.java (added)
+++ stanbol/trunk/entityhub/indexing/source/sesame/src/main/java/org/apache/stanbol/entityhub/indexing/source/sesame/RdfIndexingSource.java Mon Nov  4 14:24:56 2013
@@ -0,0 +1,541 @@
+package org.apache.stanbol.entityhub.indexing.source.sesame;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataIterator;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataProvider;
+import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
+import org.apache.stanbol.entityhub.model.sesame.RdfRepresentation;
+import org.apache.stanbol.entityhub.model.sesame.RdfValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.openrdf.model.BNode;
+import org.openrdf.model.Graph;
+import org.openrdf.model.Literal;
+import org.openrdf.model.Model;
+import org.openrdf.model.Resource;
+import org.openrdf.model.Statement;
+import org.openrdf.model.Value;
+import org.openrdf.model.ValueFactory;
+import org.openrdf.model.impl.TreeModel;
+import org.openrdf.model.util.ModelUtil;
+import org.openrdf.repository.Repository;
+import org.openrdf.repository.RepositoryConnection;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.repository.RepositoryResult;
+import org.openrdf.repository.config.RepositoryConfig;
+import org.openrdf.repository.config.RepositoryConfigException;
+import org.openrdf.repository.config.RepositoryConfigUtil;
+import org.openrdf.repository.config.RepositoryFactory;
+import org.openrdf.repository.config.RepositoryRegistry;
+import org.openrdf.repository.sail.SailRepository;
+import org.openrdf.rio.RDFFormat;
+import org.openrdf.rio.RDFParseException;
+import org.openrdf.rio.RDFParser;
+import org.openrdf.rio.Rio;
+import org.openrdf.sail.SailConnection;
+import org.openrdf.sail.memory.MemoryStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import at.newmedialab.ldpath.api.backend.RDFBackend;
+
+public class RdfIndexingSource extends AbstractSesameBackend implements EntityDataIterable, EntityDataProvider, RDFBackend<Value> {
+
+    private final Logger log = LoggerFactory.getLogger(RdfIndexingSource.class);
+    
+    public static final String PARAM_REPOSITORY_CONFIG = "config";
+    
+    public static final String DEFAULT_REPOSITORY_CONFIG = "repository.ttl";
+    
+    protected ValueFactory sesameFactory;
+    
+    protected RdfValueFactory vf;
+    
+    Repository repository;
+    //protected RepositoryConnection connection;
+
+    
+    
+    /**
+     * If {@link BNode} being values of outgoing triples should be followed.
+     */
+    protected boolean followBNodeState = true; //TODO: make configurable
+
+    private Resource[] contexts = new Resource[]{}; //TODO: make configurable
+
+    private boolean includeInferred = true; //TODO: make configurable
+    
+    protected RepositoryConfig repoConfig;
+    private RepositoryConnection ldpathConnection;
+    private Lock ldpathConnectionLock = new ReentrantLock();
+    
+    private RepositoryConnection entityDataProviderConnection;
+    private Lock entityDataProviderConnectionLock = new ReentrantLock();
+    /**
+     * {@link EntityDataIterator}s created by {@link #entityDataIterator()}
+     * do add themselves to this list while active. calling {@link #close()}
+     * to this indexing source will also call close to all iterators in this list
+     */
+    protected final List<EntityDataIterator> entityDataIterators = new CopyOnWriteArrayList<EntityDataIterator>();
+    
+    @Override
+    public void setConfiguration(Map<String,Object> config) {
+        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
+        File repoConfigFile;
+        Object value = config.get(PARAM_REPOSITORY_CONFIG);
+        if(value != null){
+            repoConfigFile = new File(indexingConfig.getConfigFolder(),value.toString());
+        } else {
+            repoConfigFile = new File(indexingConfig.getConfigFolder(),DEFAULT_REPOSITORY_CONFIG);
+        }
+        if(repoConfigFile.isFile()){ //read the config (an RDF file)
+            
+            this.repoConfig = loadRepositoryConfig(repoConfigFile);
+        } else {
+            throw new IllegalArgumentException("The configured Sesame Repository configuration fiel "
+                + repoConfigFile +" is missing. Please use the '"+PARAM_REPOSITORY_CONFIG 
+                + "' paramteter to configure the actual configuration file (relative "
+                + "to the config '"+indexingConfig.getConfigFolder()+"'folder)");
+        }
+    }
+
+    /**
+     * @param repoConfigFile
+     * @return
+     */
+    private RepositoryConfig loadRepositoryConfig(File repoConfigFile) {
+        Repository configRepo = new SailRepository(new MemoryStore());
+        RepositoryConnection con = null;
+        try {
+            con = configRepo.getConnection();
+            RDFFormat format = Rio.getParserFormatForFileName(repoConfigFile.getName());
+            try {
+                con.add(new InputStreamReader(
+                    new FileInputStream(repoConfigFile),Charset.forName("UTF-8")), 
+                    null, format);
+            } catch (RDFParseException e) {
+                throw new IllegalArgumentException("Unable to parsed '"
+                    + repoConfigFile+ "' using RDF format '"+ format +"'!", e);
+            } catch (IOException e) {
+                throw new IllegalArgumentException("Unable to access '"
+                        + repoConfigFile+ "'!", e);
+            }
+            con.commit();
+        } catch (RepositoryException e) {
+            throw new IllegalStateException("Unable to load '"
+                    + repoConfigFile+ "' to inmemory Sail!", e);
+        } finally {
+            if(con != null){
+                try {
+                    con.close();
+                } catch (RepositoryException e) {/* ignore */}
+            }
+        }
+        Set<String> repoNames;
+        RepositoryConfig repoConfig;
+        try {
+            repoNames = RepositoryConfigUtil.getRepositoryIDs(configRepo);
+            if(repoNames.size() == 1){
+                repoConfig = RepositoryConfigUtil.getRepositoryConfig(configRepo, repoNames.iterator().next());
+                repoConfig.validate();
+            } else if(repoNames.size() > 1){
+                throw new IllegalArgumentException("Repository configuration file '"
+                    +repoConfigFile+"' MUST only contain a single repository configuration!");
+            } else {
+                throw new IllegalArgumentException("Repository configuration file '"
+                        +repoConfigFile+"' DOES NOT contain a repository configuration!");
+            }
+        } catch (RepositoryException e) {
+            throw new IllegalStateException("Unable to read RepositoryConfiguration form the "
+                + "in-memory Sail!",e);
+        } catch (RepositoryConfigException e) {
+            throw new IllegalArgumentException("Repository Configuration in '"
+                + repoConfigFile + "is not valid!",e);
+        } finally {
+            try {
+                configRepo.shutDown();
+            } catch (RepositoryException e) { /* ignore */ }
+        }
+        if(repoConfig.getRepositoryImplConfig() == null){
+            throw new IllegalArgumentException("Missing RepositoryImpl config for "
+                + "config "+repoConfig.getID()+" of file "+repoConfigFile+"!");
+        }
+        return repoConfig;
+    }
+
+    @Override
+    public boolean needsInitialisation() {
+        return true;
+    }
+
+    @Override
+    public void initialise() {
+        // TODO create the Sesame Connection
+        RepositoryFactory factory = RepositoryRegistry.getInstance().get(
+            repoConfig.getRepositoryImplConfig().getType());
+        if(factory == null){
+            throw new IllegalStateException("Unable to initialise Repository (id: "
+                + repoConfig.getID()+ ", title: "+repoConfig.getTitle() + ", impl: "
+                + repoConfig.getRepositoryImplConfig().getType()+") because no "
+                + "RepositoryFactory is present for the specified implementation!");
+        }
+        try {
+            repository = factory.getRepository(repoConfig.getRepositoryImplConfig());
+        } catch (RepositoryConfigException e) {
+            throw new IllegalStateException("Unable to initialise Repository (id: "
+                + repoConfig.getID()+ ", title: "+repoConfig.getTitle() + ", impl: "
+                + repoConfig.getRepositoryImplConfig().getType()+")!", e);
+        }
+    }
+
+    @Override
+    public void close() {
+        //first close still active RdfEntityDataIterator instances
+        for(EntityDataIterator edi : entityDataIterators){
+            edi.close();
+        }
+        //close connections used for LDPath and EntityDataProvider
+        ungetLdPathConnection();
+        ungetEntityDataProviderConnection();
+        //finally shutdown the repository
+        try {
+            repository.shutDown();
+        } catch (RepositoryException e) {
+            log.warn("Error while closing Sesame Connection", e);
+        }
+    }
+
+    @Override
+    public Representation getEntityData(String id) {
+        try {
+            return createRepresentationGraph(getEntityDataProviderConnection(),
+                sesameFactory.createURI(id));
+        } catch (RepositoryException e) {
+            ungetEntityDataProviderConnection();
+            throw new IllegalStateException("Unable to create Representation '"
+                    + id + "'!", e);
+        }
+    }
+
+    @Override
+    public EntityDataIterator entityDataIterator() {
+        try {
+            return new RdfEntityDataIterator(followBNodeState, includeInferred, contexts);
+        } catch (RepositoryException e) {
+            throw new IllegalStateException("Unable to create EntityDataIterator for"
+                    + "Sesame Repository "+ repoConfig.getID() + "'!", e);
+        }
+    }
+
+    protected class RdfEntityDataIterator implements EntityDataIterator {
+
+        protected final RepositoryConnection connection;
+        protected final RepositoryResult<Statement> stdItr;
+        protected final boolean followBNodes;
+
+        private org.openrdf.model.URI currentEntity = null;
+        /**
+         * The last {@link Statement} read from {@link #stdItr}
+         */
+        private Statement currentStd = null;
+        /**
+         * The current Representation as created by {@link #next()}
+         */
+        protected RdfRepresentation currentRep;
+        /**
+         * If the {@link #stdItr} is positioned on the 2nd {@link Statement} 
+         * of the next Entity and {@link #currentStd} holds the first one.
+         */
+        private boolean nextInitialised = false;
+        
+        protected RdfEntityDataIterator(boolean followBNodes,
+                boolean includeInferred, Resource...contexts) throws RepositoryException{
+            this.connection = repository.getConnection();
+            stdItr = connection.getStatements(null, null, null, includeInferred, contexts);
+            this.followBNodes = followBNodes;
+            entityDataIterators.add(this);
+        }
+        
+        @Override
+        public boolean hasNext() {
+            if(nextInitialised){
+                return true;
+            }
+            try {
+                while(stdItr.hasNext() && !(currentStd.getSubject() instanceof org.openrdf.model.URI)){
+                    currentStd = stdItr.next();
+                }
+                if(stdItr.hasNext()){
+                    nextInitialised = true;
+                }
+                return nextInitialised;
+            } catch (RepositoryException e) {
+                throw new IllegalArgumentException("Exceptions while reading "
+                        + "Statements after " + currentStd ,e);
+            }
+        }
+
+        @Override
+        public String next() {
+            if(nextInitialised || hasNext()){
+                final org.openrdf.model.URI subject = 
+                        (org.openrdf.model.URI)currentStd.getSubject();
+                currentRep = vf.createRdfRepresentation(subject);
+                try {
+                    createRepresentation(subject, currentRep.getModel());
+                } catch (RepositoryException e) {
+                    currentRep = null;
+                    throw new IllegalStateException("Unable to read statements "
+                        + "for Entity " + (currentStd != null ? currentStd.getSubject() :
+                            "") +"!",e);
+                }
+                nextInitialised = false;
+                return subject.toString();
+            } else {
+                currentRep = null;
+                throw new NoSuchElementException();
+            }
+        }
+
+        /**
+         * Creates a representation by consuming Statements from the
+         * {@link #stdItr} until the subject changes. If {@link #followBNodes}
+         * is enabled it also recursively includes statements where the object
+         * is an {@link BNode}.
+         * @param subject the subject of the Representation to create
+         * @param model the model to add the Statements
+         * @throws RepositoryException
+         */
+        protected void createRepresentation(org.openrdf.model.URI subject, final Model model)
+                throws RepositoryException {
+            final Set<BNode> bnodes;
+            final Set<BNode> visited;
+            if(followBNodeState){
+                bnodes = new HashSet<BNode>();
+                visited = new HashSet<BNode>();
+            } else {
+                bnodes = null;
+                visited = null;
+            }
+            boolean next = false;
+            while(!next && stdItr.hasNext()){
+                currentStd = stdItr.next();
+                next = !subject.equals(currentStd.getSubject());
+                if(!next){
+                    model.add(currentStd);
+                    if(followBNodeState){ //keep referenced BNodes
+                        Value object = currentStd.getObject();
+                        if(object instanceof BNode){
+                            bnodes.add((BNode)object);
+                        }
+                    } //else do not follow BNode values
+                } //else the subject has changed ... stop here
+            }
+            if(followBNodeState){ //process BNodes
+                for(BNode bnode : bnodes){
+                    visited.add(bnode);
+                    extractRepresentation(connection, model, bnode, visited);
+                }
+            }
+        }
+
+        @Override
+        public void remove() {
+            throw new UnsupportedOperationException("read-only iterator!");
+        }
+
+        @Override
+        public Representation getRepresentation() {
+            if(currentRep == null){
+                throw new NoSuchElementException();
+            } else {
+                return currentRep;
+            }
+        }
+
+        @Override
+        public void close() {
+            entityDataIterators.remove(this);
+            try {
+                connection.close();
+            } catch (RepositoryException e) { /* ignore */ }
+        }
+        
+    }
+    
+    /**
+     * Extracts the triples that belong to the {@link Representation} with the
+     * parsed id from the Sesame repository.
+     * @param con the repository connection
+     * @param uri the subject of the Representation to extract
+     * @return the representation with the extracted data.
+     * @throws RepositoryException 
+     */
+    protected RdfRepresentation createRepresentationGraph(RepositoryConnection con, 
+            org.openrdf.model.URI uri) throws RepositoryException{
+        RdfRepresentation rep = vf.createRdfRepresentation(uri);
+        Model model = rep.getModel();
+        extractRepresentation(con, model, uri, 
+            followBNodeState ? new HashSet<BNode>() : null);
+        return rep;
+    }
+    
+    /**
+     * Extracts all {@link Statement}s part of the Representation. If
+     * {@link #followBNodeState} this is called recursively for {@link Statement}s
+     * where the value is an {@link BNode}.
+     */
+    protected void extractRepresentation(RepositoryConnection con,Model model, Resource node, Set<BNode> visited) throws RepositoryException{
+        //we need all the outgoing relations and also want to follow bNodes until
+        //the next UriRef. However we are not interested in incoming relations!
+        RepositoryResult<Statement> outgoing = con.getStatements(node, null, null, includeInferred, contexts);
+        Statement statement;
+        Set<BNode> bnodes = followBNodeState ? new HashSet<BNode>() : null;
+        while(outgoing.hasNext()){
+            statement = outgoing.next();
+            model.add(statement);
+            if(followBNodeState){
+                Value object = statement.getObject();
+                if(object instanceof BNode && !visited.contains(object)){
+                    bnodes.add((BNode)object);
+                }
+            } //else do not follow values beeing BNodes
+        }
+        outgoing.close();
+        if(followBNodeState){
+            for(BNode bnode : bnodes){
+                visited.add(bnode);
+                //TODO: recursive calls could cause stackoverflows with wired graphs
+                extractRepresentation(con, model, bnode, visited);
+            }
+        }
+    }
+    
+    /* -------------------------------------------------------------------------
+     * LDPath Backend methods
+     * -------------------------------------------------------------------------
+     */
+    
+    @Override
+    public Literal createLiteral(String content) {
+        return createLiteralInternal(sesameFactory, content);
+    }
+
+    @Override
+    public Literal createLiteral(String content, Locale language, URI type) {
+        return createLiteralInternal(sesameFactory, content, language, type);
+    }
+
+    @Override
+    public org.openrdf.model.URI createURI(String uri) {
+        return createURIInternal(sesameFactory, uri);
+    }
+
+    @Override
+    public Collection<Value> listObjects(Value subject, Value property) {
+        try {
+            return listObjectsInternal(getLdPathConnection(), (Resource)subject, 
+                asUri(property), includeInferred, contexts);
+        } catch (RepositoryException e) {
+            ungetLdPathConnection();
+            throw new IllegalStateException("Exception while accessing values for "
+                    + "TriplePattern: "+subject+", "+property+", null!",e);
+        } catch (ClassCastException e){
+            throw new IllegalStateException("Subject of triple pattern MUST NOT be "
+            		+ "a Literal (TriplePattern: "+subject+", "+property+", null)!",e);
+        }
+    }
+
+    @Override
+    public Collection<Value> listSubjects(Value property, Value object) {
+        try {
+            return listSubjectsInternal(getLdPathConnection(), asUri(property), object,
+                includeInferred, contexts);
+        } catch (RepositoryException e) {
+            ungetLdPathConnection();
+            throw new IllegalStateException("Exception while accessing values for "
+                + "TriplePattern: null, "+property+", "+object+"!",e);
+        }
+    }
+
+    protected RepositoryConnection getLdPathConnection() throws RepositoryException {
+        if(ldpathConnection == null){
+            ldpathConnectionLock.lock();
+            try {
+                if(ldpathConnection == null){
+                    ldpathConnection = repository.getConnection();
+                }
+            } finally {
+                ldpathConnectionLock.unlock();
+            }
+        }
+        return ldpathConnection;
+    }
+
+    protected void ungetLdPathConnection() {
+        ldpathConnectionLock.lock();
+        try {
+            ldpathConnection.close();
+            ldpathConnection = null;
+        } catch (RepositoryException e1) { /* ignore */
+            
+        } finally {
+            ldpathConnectionLock.unlock();
+        }
+    }
+    
+    protected RepositoryConnection getEntityDataProviderConnection() throws RepositoryException {
+        if(entityDataProviderConnection == null){
+            entityDataProviderConnectionLock.lock();
+            try {
+                if(entityDataProviderConnection == null){
+                    entityDataProviderConnection = repository.getConnection();
+                }
+            } finally {
+                entityDataProviderConnectionLock.unlock();
+            }
+        }
+        return entityDataProviderConnection;
+    }
+
+    protected void ungetEntityDataProviderConnection() {
+        entityDataProviderConnectionLock.lock();
+        try {
+            entityDataProviderConnection.close();
+            entityDataProviderConnection = null;
+        } catch (RepositoryException e1) { /* ignore */
+            
+        } finally {
+            entityDataProviderConnectionLock.unlock();
+        }
+    }
+
+    
+    private org.openrdf.model.URI asUri(Value property){
+        if(property instanceof org.openrdf.model.URI){
+            return (org.openrdf.model.URI)property;
+        } else {
+            return createURI(property.stringValue());
+        }
+    }
+    
+    
+}

Added: stanbol/trunk/entityhub/indexing/source/sesame/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/sesame/src/test/resources/log4j.properties?rev=1538622&view=auto
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/sesame/src/test/resources/log4j.properties (added)
+++ stanbol/trunk/entityhub/indexing/source/sesame/src/test/resources/log4j.properties Mon Nov  4 14:24:56 2013
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Root logger option
+log4j.rootLogger=INFO, stdout
+ 
+# Direct log messages to stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.out
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
+log4j.logger.org.apache.stanbol.entityhub.indexing=DEBUG
\ No newline at end of file