You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by wi...@apache.org on 2013/02/19 13:52:00 UTC

[32/52] [partial] code contribution, initial import of relevant modules of LMF-3.0.0-SNAPSHOT based on revision 4bf944319368 of the default branch at https://code.google.com/p/lmf/

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/.settings/org.eclipse.jdt.core.prefs
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/.settings/org.eclipse.jdt.core.prefs b/ldclient/ldclient-core/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..69c31cd
--- /dev/null
+++ b/ldclient/ldclient-core/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,8 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+org.eclipse.jdt.core.compiler.compliance=1.6
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.6

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/.settings/org.eclipse.m2e.core.prefs
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/.settings/org.eclipse.m2e.core.prefs b/ldclient/ldclient-core/.settings/org.eclipse.m2e.core.prefs
new file mode 100644
index 0000000..f897a7f
--- /dev/null
+++ b/ldclient/ldclient-core/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/.settings/org.eclipse.wst.common.component
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/.settings/org.eclipse.wst.common.component b/ldclient/ldclient-core/.settings/org.eclipse.wst.common.component
new file mode 100644
index 0000000..adae079
--- /dev/null
+++ b/ldclient/ldclient-core/.settings/org.eclipse.wst.common.component
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project-modules id="moduleCoreId" project-version="1.5.0">
+    <wb-module deploy-name="ldclient-core">
+        <wb-resource deploy-path="/" source-path="/src/main/java"/>
+        <wb-resource deploy-path="/" source-path="/src/main/resources"/>
+    </wb-module>
+</project-modules>

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/.settings/org.eclipse.wst.common.project.facet.core.xml
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/.settings/org.eclipse.wst.common.project.facet.core.xml b/ldclient/ldclient-core/.settings/org.eclipse.wst.common.project.facet.core.xml
new file mode 100644
index 0000000..c78d932
--- /dev/null
+++ b/ldclient/ldclient-core/.settings/org.eclipse.wst.common.project.facet.core.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<faceted-project>
+  <installed facet="java" version="1.6"/>
+  <installed facet="jst.utility" version="1.0"/>
+</faceted-project>

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/pom.xml
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/pom.xml b/ldclient/ldclient-core/pom.xml
new file mode 100644
index 0000000..f6a6133
--- /dev/null
+++ b/ldclient/ldclient-core/pom.xml
@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Copyright (c) 2013 Salzburg Research.
+  ~
+  ~  Licensed under the Apache License, Version 2.0 (the "License");
+  ~  you may not use this file except in compliance with the License.
+  ~  You may obtain a copy of the License at
+  ~
+  ~      http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~  Unless required by applicable law or agreed to in writing, software
+  ~  distributed under the License is distributed on an "AS IS" BASIS,
+  ~  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~  See the License for the specific language governing permissions and
+  ~  limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>at.newmedialab.lmf</groupId>
+        <artifactId>ldclient-parent</artifactId>
+        <version>3.0.0-SNAPSHOT</version>
+        <relativePath>../</relativePath>
+    </parent>
+
+    <artifactId>ldclient-core</artifactId>
+    <name>LDClient: Core Library</name>
+
+    <description>
+        Linked Data Client Core Library, provides the basic resource access functionality, including
+        HTTP connection handing and content negotiation support.
+    </description>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <version>2.4</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                        <configuration>
+                            <includes>
+                                <include>**/test/helper/**</include>
+                            </includes>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+    <dependencies>
+        <dependency>
+            <groupId>at.newmedialab.lmf</groupId>
+            <artifactId>ldclient-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.openrdf.sesame</groupId>
+            <artifactId>sesame-model</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.openrdf.sesame</groupId>
+            <artifactId>sesame-repository-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>at.newmedialab.sesame</groupId>
+            <artifactId>sesame-commons</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>commons-lang</groupId>
+            <artifactId>commons-lang</artifactId>
+        </dependency>
+
+        <!-- logging -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-ext</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>jcl-over-slf4j</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>log4j-over-slf4j</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>jul-to-slf4j</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient-cache</artifactId>
+        </dependency>
+
+
+
+        <dependency>
+        	<groupId>junit</groupId>
+        	<artifactId>junit</artifactId>
+        	<scope>test</scope>
+        </dependency>
+        <dependency>
+        	<groupId>org.slf4j</groupId>
+        	<artifactId>slf4j-simple</artifactId>
+        	<version>1.7.2</version>
+        	<scope>test</scope>
+        </dependency>
+    </dependencies>
+</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/ldclient/LDClient.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/ldclient/LDClient.java b/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/ldclient/LDClient.java
new file mode 100644
index 0000000..64d7426
--- /dev/null
+++ b/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/ldclient/LDClient.java
@@ -0,0 +1,413 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.services.ldclient;
+
+import org.apache.commons.lang.NotImplementedException;
+import org.apache.http.Header;
+import org.apache.http.HttpRequest;
+import org.apache.http.HttpResponse;
+import org.apache.http.HttpStatus;
+import org.apache.http.ProtocolException;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.HttpRequestRetryHandler;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpHead;
+import org.apache.http.client.params.ClientPNames;
+import org.apache.http.conn.ClientConnectionManager;
+import org.apache.http.conn.scheme.PlainSocketFactory;
+import org.apache.http.conn.scheme.Scheme;
+import org.apache.http.conn.scheme.SchemeRegistry;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.impl.client.DefaultRedirectStrategy;
+import org.apache.http.impl.conn.PoolingClientConnectionManager;
+import org.apache.http.params.BasicHttpParams;
+import org.apache.http.params.CoreConnectionPNames;
+import org.apache.http.params.CoreProtocolPNames;
+import org.apache.http.params.HttpParams;
+import org.apache.http.protocol.HttpContext;
+import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+import org.apache.marmotta.ldclient.api.ldclient.LDClientService;
+import org.apache.marmotta.ldclient.api.provider.DataProvider;
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.model.ClientConfiguration;
+import org.apache.marmotta.ldclient.model.ClientResponse;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ServiceLoader;
+import java.util.Set;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+
+import javax.net.ssl.SSLEngineResult.Status;
+
+/**
+ * Add file description here!
+ * <p/>
+ * User: sschaffe
+ */
+public final class LDClient implements LDClientService {
+
+    private static Logger log = LoggerFactory.getLogger(LDClient.class);
+
+    /**
+     * A Java service loader loading all implementations of data providers registered on the classpath.
+     */
+    private static ServiceLoader<DataProvider> providers = ServiceLoader.load(DataProvider.class);
+
+    /**
+     * A Java service loader loading all auto-registered endpoint configurations on the classpath.
+     */
+    private static ServiceLoader<Endpoint> defaultEndpoints = ServiceLoader.load(Endpoint.class);
+
+    private HttpClient client;
+
+    private IdleConnectionMonitorThread idleConnectionMonitorThread;
+
+    // limit maximum parallel retrievals of resources
+    private Semaphore retrievalSemaphore;
+
+    private ClientConfiguration config;
+
+    private List<Endpoint> endpoints;
+
+    public LDClient() {
+        this(new ClientConfiguration());
+    }
+
+    public LDClient(ClientConfiguration config) {
+        log.info("Initialising Linked Data Client Service ...");
+
+        this.config = config;
+
+        endpoints = new ArrayList<Endpoint>();
+        for(Endpoint endpoint : defaultEndpoints) {
+            endpoints.add(endpoint);
+        }
+        endpoints.addAll(config.getEndpoints());
+
+        Collections.sort(endpoints);
+        if(log.isInfoEnabled()) {
+            for(Endpoint endpoint : endpoints) {
+                log.info("- LDClient Endpoint: {}", endpoint.getName());
+            }
+        }
+
+        retrievalSemaphore = new Semaphore(config.getMaxParallelRequests());
+
+        if (config.getHttpClient() != null) {
+            log.debug("Using HttpClient provided in the configuration");
+            this.client = config.getHttpClient();
+        } else {
+            log.debug("Creating default HttpClient based on the configuration");
+
+            HttpParams httpParams = new BasicHttpParams();
+            httpParams.setParameter(CoreProtocolPNames.USER_AGENT, "Apache Marmotta LDClient");
+
+            httpParams.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, config.getSocketTimeout());
+            httpParams.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, config.getConnectionTimeout());
+
+            httpParams.setBooleanParameter(ClientPNames.HANDLE_REDIRECTS,true);
+            httpParams.setIntParameter(ClientPNames.MAX_REDIRECTS,3);
+
+
+            SchemeRegistry schemeRegistry = new SchemeRegistry();
+            schemeRegistry.register(
+                    new Scheme("http", 80, PlainSocketFactory.getSocketFactory()));
+
+            PoolingClientConnectionManager cm = new PoolingClientConnectionManager(schemeRegistry);
+            cm.setMaxTotal(20);
+            cm.setDefaultMaxPerRoute(10);
+
+            DefaultHttpClient client = new DefaultHttpClient(cm,httpParams);
+            client.setRedirectStrategy(new LMFRedirectStrategy());
+            client.setHttpRequestRetryHandler(new LMFHttpRequestRetryHandler());
+            idleConnectionMonitorThread = new IdleConnectionMonitorThread(client.getConnectionManager());
+            idleConnectionMonitorThread.start();
+
+            this.client = client;
+        }
+
+        for(DataProvider provider : providers) {
+            log.info("data provider: {}",provider.getName());
+        }
+    }
+    
+    @Override
+    public boolean ping(String resource) {
+        //crappy implementation only for http
+        if (resource.startsWith("http://")) {
+            try {
+                return (200 == client.execute(new HttpHead(resource)).getStatusLine().getStatusCode());
+            } catch (Exception e) {
+                log.error(e.getMessage());
+                return false;
+            }
+        } else {
+            throw new NotImplementedException("protocol not supportted");
+        }
+        
+        /*
+        and here what must be the actual implementation
+        try {
+            retrievalSemaphore.acquire();
+            if(!config.isExcludedUri(resource)) {
+
+                Endpoint endpoint = getEndpoint(resource);
+
+                if(endpoint != null) {
+                    DataProvider provider = getDataProvider(endpoint);
+                    if(provider != null) {
+                        return provider.ping(resource, this, endpoint);
+                    } else {
+                        log.error("no service provider for type {}",endpoint.getType());
+                    }
+                } else {
+                    // TODO: the fallback should at least be a Linked Data handler, so maybe we should merge the ldclient-provider-rdf?
+                    // TODO: determine service provider from connection handshaking / MIME type
+                    throw new UnsupportedOperationException("not implemented: determine service provider from connection handshaking / MIME type");
+                }
+            } else {
+                log.error("cannot retrieve a local resource; linked data caching only allowed for remote resources");
+            }
+        } catch (InterruptedException e) {
+            log.warn("retrieval of resource was interruped: {}",resource);
+        } finally {
+            retrievalSemaphore.release();
+        }
+        return false;
+        */
+    }
+
+
+    @Override
+    public void shutdown() {
+        if(config.getHttpClient() == null) {
+            // we manage our own connection pool
+            if (idleConnectionMonitorThread != null)
+                idleConnectionMonitorThread.shutdown();
+            client.getConnectionManager().shutdown();
+        }
+    }
+
+
+
+    /**
+     * Retrieve all triples for this resource from the Linked Data Cloud. Retrieval will be carried out according
+     * to the endpoint definition that matches this resource. In case no endpoint definition is found, the method
+     * will try an "default" Linked Data retrieval if the configuration option "ldcache.fallback" is set to true
+     *
+     *
+     *
+     * @param resource the URI resource for which to retrieve the triples
+     * @return a Sesame in-memory repository containing the triples for this resource
+     */
+    @Override
+    public ClientResponse retrieveResource(String resource) throws DataRetrievalException {
+        try {
+            retrievalSemaphore.acquire();
+            if(!config.isExcludedUri(resource)) {
+
+                Endpoint endpoint = getEndpoint(resource);
+
+                if(endpoint != null) {
+                    DataProvider provider = getDataProvider(endpoint);
+                    if(provider != null) {
+                        return provider.retrieveResource(resource, this, endpoint);
+                    } else {
+                        log.error("no service provider for type {}",endpoint.getType());
+                    }
+                } else {
+                    // TODO: the fallback should at least be a Linked Data handler, so maybe we should merge the ldclient-provider-rdf?
+                    // TODO: determine service provider from connection handshaking / MIME type
+                    throw new UnsupportedOperationException("not implemented: determine service provider from connection handshaking / MIME type");
+                }
+            } else {
+                log.error("cannot retrieve a local resource; linked data caching only allowed for remote resources");
+            }
+        } catch (InterruptedException e) {
+            log.warn("retrieval of resource was interruped: {}",resource);
+        } finally {
+            retrievalSemaphore.release();
+        }
+
+
+        return null;
+    }
+
+    /**
+     * Get access to the Apache HTTP Client managed by the connection handler to execute
+     * a request.
+     *
+     * @return
+     */
+    @Override
+    public HttpClient getClient() {
+        return client;
+    }
+
+    /**
+     * Get the client configuration used by the connection handler
+     *
+     * @return
+     */
+    @Override
+    public ClientConfiguration getClientConfiguration() {
+        return config;
+    }
+
+
+    /**
+     * Retrieve the endpoint matching the KiWiUriResource passed as argument. The endpoint is determined by
+     * matching the endpoint's URI prefix with the resource URI. If no matching endpoint exists, returns null.
+     * The LinkedDataClientService can then decide (based on configuration) whether to try with a standard
+     * LinkedDataRequest or ignore the request.
+     *
+     * @param resource the KiWiUriResource to check.
+     */
+    @Override
+    public Endpoint getEndpoint(String resource) {
+        for(Endpoint endpoint : endpoints) {
+            if (endpoint.handles(resource)) return endpoint;
+        }
+
+        return null;
+    }
+
+    /**
+     * Test whether an endpoint definition for the given url pattern already exists.
+     *
+     * @param urlPattern
+     * @return
+     */
+    @Override
+    public boolean hasEndpoint(String urlPattern) {
+        for(Endpoint endpoint : endpoints) {
+            if(endpoint.getUriPattern() != null && endpoint.getUriPattern().equals(urlPattern)) return true;
+        }
+        return false;
+    }
+
+    /**
+     * Return a collection of all available data providers (i.e. registered through the service loader).
+     * @return
+     */
+    @Override
+    public Set<DataProvider> getDataProviders() {
+        Set<DataProvider> result = new HashSet<DataProvider>();
+        for(DataProvider p : providers) {
+            result.add(p);
+        }
+        return result;
+    }
+
+
+    private DataProvider getDataProvider(Endpoint endpoint) {
+        for(DataProvider provider : providers) {
+            if(endpoint.getType().equalsIgnoreCase(provider.getName())) return provider;
+        }
+        return null;
+    }
+
+    private static class LMFRedirectStrategy extends DefaultRedirectStrategy {
+        @Override
+        public boolean isRedirected(HttpRequest request, HttpResponse response, HttpContext context) throws ProtocolException {
+            if (response == null) throw new IllegalArgumentException("HTTP response may not be null");
+
+            int statusCode = response.getStatusLine().getStatusCode();
+            String method = request.getRequestLine().getMethod();
+            Header locationHeader = response.getFirstHeader("location");
+            switch (statusCode) {
+                case HttpStatus.SC_MOVED_TEMPORARILY:
+                    return (method.equalsIgnoreCase(HttpGet.METHOD_NAME)
+                            || method.equalsIgnoreCase(HttpHead.METHOD_NAME)) && locationHeader != null;
+                case HttpStatus.SC_MOVED_PERMANENTLY:
+                case HttpStatus.SC_TEMPORARY_REDIRECT:
+                    return method.equalsIgnoreCase(HttpGet.METHOD_NAME)
+                            || method.equalsIgnoreCase(HttpHead.METHOD_NAME);
+                case HttpStatus.SC_SEE_OTHER:
+                    return true;
+                case HttpStatus.SC_MULTIPLE_CHOICES:
+                    return true;
+                default:
+                    return false;
+            } //end of switch
+        }
+    }
+
+    private static class LMFHttpRequestRetryHandler implements HttpRequestRetryHandler  {
+        /**
+         * Determines if a method should be retried after an IOException
+         * occurs during execution.
+         *
+         * @param exception      the exception that occurred
+         * @param executionCount the number of times this method has been
+         *                       unsuccessfully executed
+         * @param context        the context for the request execution
+         * @return <code>true</code> if the method should be retried, <code>false</code>
+         *         otherwise
+         */
+        @Override
+        public boolean retryRequest(IOException exception, int executionCount, HttpContext context) {
+            return false;
+        }
+    }
+
+    private static class IdleConnectionMonitorThread extends Thread {
+
+        private final ClientConnectionManager connMgr;
+        private volatile boolean shutdown;
+
+        public IdleConnectionMonitorThread(ClientConnectionManager connMgr) {
+            super("LD HTTP Client Idle Connection Manager");
+            this.connMgr = connMgr;
+            setDaemon(true);
+        }
+
+        @Override
+        public void run() {
+            try {
+                while (!shutdown) {
+                    synchronized (this) {
+                        wait(5000);
+                        // Close expired connections
+                        connMgr.closeExpiredConnections();
+                        // Optionally, close connections
+                        // that have been idle longer than 30 sec
+                        connMgr.closeIdleConnections(30, TimeUnit.SECONDS);
+                    }
+                }
+            } catch (InterruptedException ex) {
+                // terminate
+            }
+        }
+
+        public void shutdown() {
+            shutdown = true;
+            synchronized (this) {
+                notifyAll();
+            }
+        }
+
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/AbstractHttpProvider.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/AbstractHttpProvider.java b/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/AbstractHttpProvider.java
new file mode 100644
index 0000000..c6bd0f8
--- /dev/null
+++ b/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/AbstractHttpProvider.java
@@ -0,0 +1,298 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.services.provider;
+
+import org.apache.http.Header;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.cookie.DateParseException;
+import org.apache.http.impl.cookie.DateUtils;
+import org.apache.http.util.EntityUtils;
+import org.apache.marmotta.commons.collections.CollectionUtils;
+import org.apache.marmotta.commons.http.ContentType;
+import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+import org.apache.marmotta.ldclient.api.ldclient.LDClientService;
+import org.apache.marmotta.ldclient.api.provider.DataProvider;
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.model.ClientResponse;
+import org.openrdf.repository.Repository;
+import org.openrdf.repository.RepositoryConnection;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.repository.sail.SailRepository;
+import org.openrdf.sail.memory.MemoryStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Queue;
+import java.util.Set;
+
+import static org.apache.marmotta.commons.http.LMFHttpUtils.parseContentType;
+
+/**
+ * Add file description here!
+ * <p/>
+ * Author: Sebastian Schaffert
+ */
+public abstract class AbstractHttpProvider implements DataProvider {
+
+    private static Logger log = LoggerFactory.getLogger(AbstractHttpProvider.class);
+
+    /**
+     * Build the URL to use to call the webservice in order to retrieve the data for the resource passed as argument.
+     * In many cases, this will just return the URI of the resource (e.g. Linked Data), but there might be data providers
+     * that use different means for accessing the data for a resource, e.g. SPARQL or a Cache.
+     *
+     *
+     *
+     * @param resourceUri
+     * @param endpoint endpoint configuration for the data provider (optional)
+     * @return
+     */
+    protected abstract List<String> buildRequestUrl(String resourceUri, Endpoint endpoint) throws DataRetrievalException;
+
+    /**
+     * Parse the HTTP response entity returned by the web service call and return its contents in a Sesame RDF
+     * repository also passed as argument. The content type returned by the web service is passed as argument to help
+     * the implementation decide how to parse the data. The implementation can return a list of additional pages to
+     * retrieve for completing the data of the resource
+     *
+     *
+     *
+     *
+     * @param resourceUri
+     * @param repository   an RDF repository for storing an RDF representation of the dataset located at the remote resource.
+     * @param in           input stream as returned by the remote webservice
+     * @param contentType  content type as returned in the HTTP headers of the remote webservice
+     * @return a possibly empty list of URLs of additional resources to retrieve to complete the content
+     * @throws java.io.IOException in case an error occurs while reading the input stream
+     */
+    protected abstract List<String> parseResponse(String resourceUri, String requestUrl, Repository repository, InputStream in, String contentType) throws DataRetrievalException;
+
+    /**
+     * Retrieve the data for a resource using the given http client and endpoint definition. The service is
+     * supposed to manage the connection handling itself. See {@link AbstractHttpProvider}
+     * for a generic implementation of this method.
+     *
+     *
+     *
+     * @param resource the resource to be retrieved
+     * @param endpoint the endpoint definition
+     * @return a completely specified client response, including expiry information and the set of triples
+     */
+    @Override
+    public ClientResponse retrieveResource(String resource, LDClientService client, Endpoint endpoint) throws DataRetrievalException {
+
+        try {
+
+            String contentType;
+            if(endpoint != null && endpoint.getContentTypes().size() > 0) {
+                contentType = CollectionUtils.fold(endpoint.getContentTypes(), new CollectionUtils.StringSerializer<ContentType>() {
+                    @Override
+                    public String serialize(ContentType contentType) {
+                        return contentType.toString("q");
+                    }
+                },",");
+            } else {
+                contentType = CollectionUtils.fold(Arrays.asList(listMimeTypes()), ",");
+            }
+
+            long defaultExpires = client.getClientConfiguration().getDefaultExpiry();
+            if(endpoint != null && endpoint.getDefaultExpiry() != null) {
+                defaultExpires = endpoint.getDefaultExpiry();
+            }
+
+            final ResponseHandler handler = new ResponseHandler(resource, endpoint);
+
+            // a queue for queuing the request URLs needed to build the query response
+            Queue<String> requestUrls = new LinkedList<String>();
+            requestUrls.addAll(buildRequestUrl(resource, endpoint));
+
+            Set<String> visited = new HashSet<String>();
+
+            String requestUrl = requestUrls.poll();
+            while(requestUrl != null) {
+
+                if(!visited.contains(requestUrl)) {
+                    HttpGet get = new HttpGet(requestUrl);
+                    try {
+                        get.setHeader("Accept",contentType);
+                        get.setHeader("Accept-Language", "*"); // PoolParty compatibility
+
+                        log.info("retrieving resource data for {} from '{}' endpoint, request URI is <{}>", new Object[]  {resource, getName(), get.getURI().toASCIIString()});
+
+                        handler.requestUrl = requestUrl;
+                        List<String> additionalRequestUrls = client.getClient().execute(get, handler);
+                        requestUrls.addAll(additionalRequestUrls);
+
+                        visited.add(requestUrl);
+                    } finally {
+                        get.releaseConnection();
+                    }
+                }
+
+                requestUrl = requestUrls.poll();
+            }
+
+            Date expiresDate = handler.expiresDate;
+            if (expiresDate == null) {
+                expiresDate = new Date(System.currentTimeMillis() + defaultExpires * 1000);
+            }
+
+            long min_expires = System.currentTimeMillis() + client.getClientConfiguration().getMinimumExpiry() * 1000;
+            if (expiresDate.getTime() < min_expires) {
+                log.info("expiry time returned by request lower than minimum expiration time; using minimum time instead");
+                expiresDate = new Date(min_expires);
+            }
+
+            if(log.isInfoEnabled()) {
+                RepositoryConnection con = handler.triples.getConnection();
+                log.info("retrieved {} triples for resource {}; expiry date: {}",new Object[] {con.size(),resource,expiresDate});
+                con.close();
+            }
+
+            ClientResponse result = new ClientResponse(handler.triples);
+            result.setExpires(expiresDate);
+            return result;
+        } catch (RepositoryException e) {
+            log.error("error while initialising Sesame repository; classpath problem?",e);
+            throw new DataRetrievalException("error while initialising Sesame repository; classpath problem?",e);
+        } catch (ClientProtocolException e) {
+            log.error("HTTP client error while trying to retrieve resource {}: {}", resource, e.getMessage());
+            throw new DataRetrievalException("I/O error while trying to retrieve resource "+resource,e);
+        } catch (IOException e) {
+            log.error("I/O error while trying to retrieve resource {}: {}", resource, e.getMessage());
+            throw new DataRetrievalException("I/O error while trying to retrieve resource "+resource,e);
+        } catch(RuntimeException ex) {
+            log.error("Unknown error while trying to retrieve resource {}: {}", resource, ex.getMessage());
+            throw new DataRetrievalException("Unknown error while trying to retrieve resource "+resource,ex);
+        }
+
+    }
+
+
+
+    /**
+     * Check whether the content type returned by the server is acceptable to the endpoint and data provider
+     */
+    protected boolean isValidContentType(String contentType, Endpoint endpoint) {
+        if(endpoint != null && endpoint.getContentTypes().size() > 0) {
+            ContentType parsed = parseContentType(contentType);
+            for(ContentType valid : endpoint.getContentTypes()) {
+                if(valid.matches(parsed) || valid.matchesWildcard(parsed)) {
+                    return true;
+                }
+            }
+            return false;
+        } else {
+            // TODO: should probably be removed, since it is not used
+            for(String type : listMimeTypes()) {
+                if(type.split(";")[0].equalsIgnoreCase(contentType)) return true;
+            }
+            return false;
+        }
+    }
+
+    private class ResponseHandler implements org.apache.http.client.ResponseHandler<List<String>> {
+
+        private Date             expiresDate;
+
+        private String                requestUrl;
+
+        // the repository where the triples will be stored in case the data providers return them
+        private final Repository triples;
+
+        private final Endpoint   endpoint;
+
+        private final String resource;
+
+        public ResponseHandler(String resource, Endpoint endpoint) throws RepositoryException {
+            this.resource = resource;
+            this.endpoint = endpoint;
+
+            triples = new SailRepository(new MemoryStore());
+            triples.initialize();
+        }
+
+        @Override
+        public List<String> handleResponse(HttpResponse response) throws ClientProtocolException, IOException {
+            ArrayList<String> requestUrls = new ArrayList<String>();
+
+            if (response.getStatusLine().getStatusCode() >= 200 && response.getStatusLine().getStatusCode() < 400) {
+            	final HttpEntity entity = response.getEntity();
+            	if (entity == null)
+            		throw new IOException("no content returned by Linked Data resource " + resource);
+
+	            if (!isValidContentType(entity.getContentType().getValue().split(";")[0], endpoint)) {
+	                // FIXME: here was get.abort()
+	            	throw new IOException("invalid content returned by Linked Data resource " + resource + ": "
+	            			+ entity.getContentType().getValue());
+	            }
+
+
+                if (entity != null) {
+                    String parseContentType = "application/rdf+xml";
+                    if (endpoint != null && "SPARQL".equals(endpoint.getType())) {
+                        parseContentType = "application/sparql-results+xml";
+                    } else if (entity.getContentType() != null) {
+                        parseContentType = entity.getContentType().getValue().split(";")[0];
+                    }
+
+                    InputStream in = entity.getContent();
+                    try {
+
+                        List<String> urls = parseResponse(resource, requestUrl, triples, in, parseContentType);
+                        requestUrls.addAll(urls);
+
+                        if (expiresDate == null) {
+                            Header expires = response.getFirstHeader("Expires");
+                            if (expires != null) {
+                                try {
+                                    expiresDate = DateUtils.parseDate(expires.getValue());
+                                } catch (DateParseException e) {
+                                    log.debug("error parsing Expires: header");
+                                }
+                            }
+                        }
+
+                    } catch (DataRetrievalException e) {
+                        // FIXME: get.abort();
+                        throw new IOException(e);
+                    } finally {
+                        in.close();
+                    }
+                } 
+                EntityUtils.consume(entity);
+            } else {
+                log.error("the HTTP request failed (status: {})", response.getStatusLine());
+                throw new ClientProtocolException("the HTTP request failed (status: " + response.getStatusLine() + ")");
+            }
+
+            return requestUrls;
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/BlockingProvider.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/BlockingProvider.java b/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/BlockingProvider.java
new file mode 100644
index 0000000..0388985
--- /dev/null
+++ b/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/BlockingProvider.java
@@ -0,0 +1,92 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.services.provider;
+
+import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+import org.apache.marmotta.ldclient.api.ldclient.LDClientService;
+import org.apache.marmotta.ldclient.api.provider.DataProvider;
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.model.ClientResponse;
+import org.openrdf.repository.Repository;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.repository.sail.SailRepository;
+import org.openrdf.sail.memory.MemoryStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Date;
+
+/**
+ * Add file description here!
+ * <p/>
+ * Author: Sebastian Schaffert (sschaffert@apache.org)
+ */
+public class BlockingProvider implements DataProvider {
+
+
+    public static final String PROVIDER_NAME = "NONE";
+
+    private static Logger log = LoggerFactory.getLogger(BlockingProvider.class);
+
+    private static final Repository empty_repository = new SailRepository(new MemoryStore());
+    static {
+        try {
+            empty_repository.initialize();
+        } catch (RepositoryException e) {
+        }
+    }
+
+    /**
+     * Return the name of this data provider. To be used e.g. in the configuration and in log messages.
+     *
+     * @return
+     */
+    @Override
+    public String getName() {
+        return PROVIDER_NAME;
+    }
+
+    /**
+     * Return the list of mime types accepted by this data provider.
+     *
+     * @return
+     */
+    @Override
+    public String[] listMimeTypes() {
+        return new String[0];
+    }
+
+    /**
+     * Retrieve the data for a resource using the given http client and endpoint definition. The service is
+     * supposed to manage the connection handling itself. See AbstractHttpProvider
+     * for a generic implementation of this method.
+     *
+     * @param resource the resource to be retrieved
+     * @param endpoint the endpoint definition
+     * @return a completely specified client response, including expiry information and the set of triples
+     */
+    @Override
+    public ClientResponse retrieveResource(String resource, LDClientService client, Endpoint endpoint) throws DataRetrievalException {
+        log.info("blocked retrieval of resource {}", resource);
+
+        long defaultExpires = client.getClientConfiguration().getDefaultExpiry();
+        Date expiresDate = new Date(System.currentTimeMillis() + defaultExpires * 1000);
+
+        ClientResponse result = new ClientResponse(empty_repository);
+        result.setExpires(expiresDate);
+        return result;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/main/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/main/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider b/ldclient/ldclient-core/src/main/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider
new file mode 100644
index 0000000..b0d1696
--- /dev/null
+++ b/ldclient/ldclient-core/src/main/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider
@@ -0,0 +1 @@
+org.apache.marmotta.ldclient.services.provider.BlockingProvider
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyEndpoint.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyEndpoint.java b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyEndpoint.java
new file mode 100644
index 0000000..caa1b97
--- /dev/null
+++ b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyEndpoint.java
@@ -0,0 +1,12 @@
+package org.apache.marmotta.ldclient.dummy;
+
+import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+
+public class DummyEndpoint extends Endpoint {
+	
+	public DummyEndpoint() {
+		super("Dummy", "Dummy", "^http://127.1.2.3", null, 86400l);
+		setPriority(PRIORITY_HIGH);
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyProvider.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyProvider.java b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyProvider.java
new file mode 100644
index 0000000..074c8d6
--- /dev/null
+++ b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyProvider.java
@@ -0,0 +1,51 @@
+package org.apache.marmotta.ldclient.dummy;
+
+import java.io.IOException;
+
+import org.apache.http.HttpResponse;
+import org.apache.http.StatusLine;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.ResponseHandler;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+import org.apache.marmotta.ldclient.api.ldclient.LDClientService;
+import org.apache.marmotta.ldclient.api.provider.DataProvider;
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.model.ClientResponse;
+
+public class DummyProvider implements DataProvider {
+
+	@Override
+	public String getName() {
+		return "Dummy";
+	}
+
+	@Override
+	public String[] listMimeTypes() {
+		return new String[] {"application/dummy"};
+	}
+
+	@Override
+	public ClientResponse retrieveResource(String resource,
+			LDClientService client, Endpoint endpoint)
+			throws DataRetrievalException {
+
+		try {
+			final HttpGet request = new HttpGet(resource);
+			client.getClient().execute(request, new ResponseHandler<String>() {
+
+				@Override
+				public String handleResponse(HttpResponse response)
+						throws ClientProtocolException, IOException {
+					StatusLine sL = response.getStatusLine();
+					return sL.getReasonPhrase();
+				}
+			});
+			return null;
+		} catch (Exception e) {
+			throw new DataRetrievalException(e);
+		}
+	}
+
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/TestLDClientTest.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/TestLDClientTest.java b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/TestLDClientTest.java
new file mode 100644
index 0000000..63d363f
--- /dev/null
+++ b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/TestLDClientTest.java
@@ -0,0 +1,44 @@
+package org.apache.marmotta.ldclient.test;
+
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.services.ldclient.LDClient;
+import org.apache.marmotta.ldclient.test.helper.TestLDClient;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestLDClientTest {
+
+    private TestLDClient client;
+
+    @Before
+    public void setUp() {
+        TestLDClient testLDClient = new TestLDClient(new LDClient());
+        client = testLDClient;
+    }
+
+    @After
+    public void cleanUp() {
+        client.shutdown();
+    }
+
+    @Test(expected = UnsupportedOperationException.class)
+    public void testConnectionRefused() throws Exception {
+        client.retrieveResource("http://no.host.for/this/url");
+        Assert.fail();
+    }
+
+    @Test(expected = DataRetrievalException.class)
+    public void testLocalhostInvalidPort() throws Exception {
+        client.retrieveResource("http://127.1.2.3:-1/");
+        Assert.fail();
+    }
+
+    @Test(expected = UnsupportedOperationException.class)
+    public void testMissingProvider() throws Exception {
+        client.retrieveResource("ftp://no.provider.for/this/url");
+        Assert.fail();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/helper/TestLDClient.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/helper/TestLDClient.java b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/helper/TestLDClient.java
new file mode 100644
index 0000000..d032e73
--- /dev/null
+++ b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/helper/TestLDClient.java
@@ -0,0 +1,209 @@
+package org.apache.marmotta.ldclient.test.helper;
+
+import java.io.IOException;
+import java.net.ConnectException;
+import java.net.SocketTimeoutException;
+import java.net.UnknownHostException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import org.apache.http.client.HttpClient;
+import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+import org.apache.marmotta.ldclient.api.ldclient.LDClientService;
+import org.apache.marmotta.ldclient.api.provider.DataProvider;
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.model.ClientConfiguration;
+import org.apache.marmotta.ldclient.model.ClientResponse;
+import org.junit.Assume;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This is a simple wrapper to use in UnitTests, which handles typical
+ * Exceptions when contacting remote resources.
+ * 
+ * All methods except {@link #retrieveResource(String)} are relayed to the
+ * delegate. {@link #retrieveResource(String)} checks for common retrieval
+ * errors such as "IOException: Connection refused" and deactivates any ongoing
+ * Unit-Test using {@link Assume}.
+ * 
+ */
+public class TestLDClient implements LDClientService {
+
+	private static final Logger log = LoggerFactory.getLogger(TestLDClient.class);
+	
+	/**
+	 * The default checks were shamelessly taken from Apache Stanbol.
+	 * @see <a href="http://svn.apache.org/repos/asf/stanbol/trunk/enhancer/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/RemoteServiceHelper.java">http://svn.apache.org/repos/asf/stanbol/trunk/enhancer/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/RemoteServiceHelper.java</a>
+	 */
+	public static final List<Check> DEFAULT_CHECKS;
+	static {
+		LinkedList<Check> dc = new LinkedList<Check>();
+		
+		dc.add(new Check(UnknownHostException.class));
+		dc.add(new Check(SocketTimeoutException.class));
+		dc.add(new Check(IOException.class, "Connection refused"));
+		dc.add(new Check(IOException.class, "Server returned HTTP response code: 50"));
+		dc.add(new Check(ConnectException.class, "unreachable"));
+		
+		DEFAULT_CHECKS = Collections.unmodifiableList(dc);
+	}
+	
+	private final LDClientService delegate;
+	private final List<Check> extraCheck;
+	private boolean defaultChecks = true;
+
+	public TestLDClient(LDClientService delegate) {
+		this(delegate, new LinkedList<Check>());
+	}
+
+	public TestLDClient(LDClientService delegate, LinkedList<Check> extraChecks) {
+		this(delegate, extraChecks, true);
+	}
+
+	public TestLDClient(LDClientService delegate, LinkedList<Check> extraChecks, boolean defaultChecks) {
+		this.delegate = delegate;
+		this.extraCheck = extraChecks;
+		this.defaultChecks = defaultChecks;
+	}
+	
+    @Override
+    public boolean ping(String resource) {
+        return delegate.ping(resource);
+    }
+
+    @Override
+	public ClientResponse retrieveResource(String resource)
+			throws DataRetrievalException {
+		try {
+			return delegate.retrieveResource(resource);
+		} catch (final DataRetrievalException e) {
+			if (defaultChecks) {
+				for (Check exCheck : DEFAULT_CHECKS) {
+					exCheck.matches(e);
+				}
+			}
+			for (Check exCheck : extraCheck) {
+				exCheck.matches(e);
+			}
+			throw e;
+		}
+	}
+	
+    @Override
+	public HttpClient getClient() {
+		return delegate.getClient();
+	}
+
+    @Override
+	public ClientConfiguration getClientConfiguration() {
+		return delegate.getClientConfiguration();
+	}
+
+    @Override
+	public Endpoint getEndpoint(String resource) {
+		return delegate.getEndpoint(resource);
+	}
+
+    @Override
+	public boolean hasEndpoint(String urlPattern) {
+		return delegate.hasEndpoint(urlPattern);
+	}
+
+    @Override
+	public void shutdown() {
+		delegate.shutdown();
+	}
+
+    /**
+     * Return a collection of all available data providers (i.e. registered through the service loader).
+     *
+     * @return
+     */
+    @Override
+    public Set<DataProvider> getDataProviders() {
+        return delegate.getDataProviders();
+    }
+
+    public void addCheck(Check check) {
+		extraCheck.add(check);
+	}
+	
+	public void addChecks(Collection<Check> checks) {
+		extraCheck.addAll(checks);
+	}
+
+	public boolean isDefaultChecks() {
+		return defaultChecks;
+	}
+
+	public void setDefaultChecks(boolean defaultChecks) {
+		this.defaultChecks = defaultChecks;
+	}
+
+	public static class Check {
+		private final Class<? extends Throwable> throwable;
+		private final Pattern messagePattern;
+		private boolean checkStack = true;
+		private String infoMessage;
+
+		public Check(Class<? extends Throwable> throwable) {
+			this(throwable, (Pattern) null);
+		}
+		
+		public Check(Class<? extends Throwable> throwable, String message) {
+			this(throwable, Pattern.compile(Pattern.quote(message), Pattern.CASE_INSENSITIVE));
+			this.infoMessage = String.format("Ignoring because of %s(\"%s\")", throwable.getSimpleName(), message);
+		}
+		
+		public Check(Class<? extends Throwable> throwable, Pattern mPattern) {
+			this.throwable = throwable;
+			this.messagePattern = mPattern;
+			this.infoMessage = String.format("Ignoring because of %s", throwable.getSimpleName());
+		}
+		
+		public Check setCheckStack(boolean checkStack) {
+			this.checkStack = checkStack;
+			return this;
+		}
+
+		/**
+		 * Check if the provided parameter matches this check. 
+		 * If so, the current JUnit test is ignored ({@link Assume}) and the parameter exception is re-thrown. 
+		 * @param t the {@link Throwable} to check.
+		 */
+		public <T extends Throwable> void matches(T t) throws T {
+			matches(t, t);
+		}
+		
+		private <T extends Throwable> void matches(T t, Throwable toCheck) throws T {
+			if (toCheck == null) return;
+			
+			if (throwable.isAssignableFrom(toCheck.getClass())
+					&& (messagePattern == null 
+						|| (toCheck.getMessage() != null && messagePattern.matcher(toCheck.getMessage()).find()))) {
+				log.info("Ignoring test because '{}' ({})", getMessage(), t.getMessage());
+				Assume.assumeNoException(getMessage(), t);
+				throw t;
+			}
+			
+			if (checkStack)
+				matches(t, toCheck.getCause());
+		}
+		
+		public String getMessage() {
+			return infoMessage;
+		}
+		
+		public Check setMessage(String infoMessage) {
+			this.infoMessage = infoMessage;
+			return this;
+		}
+		
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.endpoint.Endpoint
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.endpoint.Endpoint b/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.endpoint.Endpoint
new file mode 100644
index 0000000..cda05f9
--- /dev/null
+++ b/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.endpoint.Endpoint
@@ -0,0 +1 @@
+org.apache.marmotta.ldclient.dummy.DummyEndpoint
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider b/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider
new file mode 100644
index 0000000..2b6fcfc
--- /dev/null
+++ b/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider
@@ -0,0 +1 @@
+org.apache.marmotta.ldclient.dummy.DummyProvider
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.classpath
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.classpath b/ldclient/ldclient-provider-html/.classpath
new file mode 100644
index 0000000..1b28ee5
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.classpath
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" output="target/classes" path="src/main/java"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
+	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+		<attributes>
+			<attribute name="org.eclipse.jst.component.nondependency" value=""/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="output" path="target/classes"/>
+</classpath>

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.project
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.project b/ldclient/ldclient-provider-html/.project
new file mode 100644
index 0000000..b7daee3
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.project
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>ldclient-provider-html</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.wst.common.project.facet.core.builder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.m2e.core.maven2Builder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.wst.validation.validationbuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jem.workbench.JavaEMFNature</nature>
+		<nature>org.eclipse.wst.common.modulecore.ModuleCoreNature</nature>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+		<nature>org.eclipse.m2e.core.maven2Nature</nature>
+		<nature>org.eclipse.wst.common.project.facet.core.nature</nature>
+	</natures>
+</projectDescription>

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.settings/org.eclipse.core.resources.prefs
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.settings/org.eclipse.core.resources.prefs b/ldclient/ldclient-provider-html/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 0000000..2b76340
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,2 @@
+eclipse.preferences.version=1
+encoding//src/main/java=UTF-8

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.settings/org.eclipse.jdt.core.prefs
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.settings/org.eclipse.jdt.core.prefs b/ldclient/ldclient-provider-html/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..69c31cd
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,8 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+org.eclipse.jdt.core.compiler.compliance=1.6
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.6

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.settings/org.eclipse.m2e.core.prefs
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.settings/org.eclipse.m2e.core.prefs b/ldclient/ldclient-provider-html/.settings/org.eclipse.m2e.core.prefs
new file mode 100644
index 0000000..f897a7f
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.component
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.component b/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.component
new file mode 100644
index 0000000..7e180a5
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.component
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project-modules id="moduleCoreId" project-version="1.5.0">
+    <wb-module deploy-name="ldclient-provider-html">
+        <wb-resource deploy-path="/" source-path="/src/main/java"/>
+    </wb-module>
+</project-modules>

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.project.facet.core.xml
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.project.facet.core.xml b/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.project.facet.core.xml
new file mode 100644
index 0000000..c78d932
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.project.facet.core.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<faceted-project>
+  <installed facet="java" version="1.6"/>
+  <installed facet="jst.utility" version="1.0"/>
+</faceted-project>

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/pom.xml
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/pom.xml b/ldclient/ldclient-provider-html/pom.xml
new file mode 100644
index 0000000..f6415c8
--- /dev/null
+++ b/ldclient/ldclient-provider-html/pom.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Copyright (c) 2013 Salzburg Research.
+  ~  
+  ~  Licensed under the Apache License, Version 2.0 (the "License");
+  ~  you may not use this file except in compliance with the License.
+  ~  You may obtain a copy of the License at
+  ~  
+  ~      http://www.apache.org/licenses/LICENSE-2.0
+  ~  
+  ~  Unless required by applicable law or agreed to in writing, software
+  ~  distributed under the License is distributed on an "AS IS" BASIS,
+  ~  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~  See the License for the specific language governing permissions and
+  ~  limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>at.newmedialab.lmf</groupId>
+        <artifactId>ldclient-parent</artifactId>
+        <version>3.0.0-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>ldclient-provider-html</artifactId>
+    <name>LDClient Provider: HTML Resource Access</name>
+
+    <description>
+        Provides basic support for accessing HTML resources as Linked Data. This package only offers abstract classes
+        that need to be subclassed for concrete cases.
+    </description>
+
+    <dependencies>
+        <dependency>
+            <groupId>at.newmedialab.lmf</groupId>
+            <artifactId>ldclient-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>at.newmedialab.lmf</groupId>
+            <artifactId>ldclient-core</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>at.newmedialab.sesame</groupId>
+            <artifactId>sesame-commons</artifactId>
+        </dependency>
+
+    </dependencies>
+
+</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/AbstractHTMLDataProvider.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/AbstractHTMLDataProvider.java b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/AbstractHTMLDataProvider.java
new file mode 100644
index 0000000..0d63dc2
--- /dev/null
+++ b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/AbstractHTMLDataProvider.java
@@ -0,0 +1,140 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.provider.html;
+
+import at.newmedialab.sesame.commons.model.Namespaces;
+import org.apache.marmotta.ldclient.api.provider.DataProvider;
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.provider.html.mapping.JSoupMapper;
+import org.apache.marmotta.ldclient.services.provider.AbstractHttpProvider;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.openrdf.model.Resource;
+import org.openrdf.model.Statement;
+import org.openrdf.model.URI;
+import org.openrdf.model.Value;
+import org.openrdf.model.ValueFactory;
+import org.openrdf.repository.Repository;
+import org.openrdf.repository.RepositoryConnection;
+import org.openrdf.repository.RepositoryException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Generic implementation of an HTML data provider capable of mapping XPath expressions from HTML documents to
+ * RDF metadata properties. The HTML data provider will first clean up the potentially messy HTML using HTMLCleaner
+ * and then evaluate XPath expressions on it in the same way as the AbstractXMLDataProvider
+ * <p/>
+ * Author: Sebastian Schaffert
+ */
+public abstract class AbstractHTMLDataProvider extends AbstractHttpProvider implements DataProvider {
+
+    /**
+     * Return a list of URIs that should be added as types for each processed resource.
+     * 
+     * @return
+     * @param resource
+     */
+    protected abstract List<String> getTypes(URI resource);
+
+    /**
+     * Try to find further URLs in the document that need to be requested to complete the resource
+     * data.
+     * Used e.g. to parse the result of paging in HTML pages. The default implementation returns an
+     * empty list.
+     * 
+     *
+     * @param resource
+     * @param document
+     * @param requestUrl
+     * @return
+     */
+    protected List<String> findAdditionalRequestUrls(String resource, Document document, String requestUrl) {
+        return Collections.emptyList();
+    }
+
+
+    /**
+     * Parse the HTTP response entity returned by the web service call and return its contents as a Sesame RDF
+     * repository. The content type returned by the web service is passed as argument to help the implementation
+     * decide how to parse the data.
+     *
+     *
+     * @param resource    the subject of the data retrieval
+     * @param in          input stream as returned by the remote webservice
+     * @param contentType content type as returned in the HTTP headers of the remote webservice
+     * @return an RDF repository containing an RDF representation of the dataset located at the remote resource.
+     * @throws java.io.IOException in case an error occurs while reading the input stream
+     */
+    @Override
+    public List<String> parseResponse(String resource, String requestUrl, Repository triples, InputStream in, String contentType) throws DataRetrievalException {
+        String charset = null;
+        Pattern pattern = Pattern.compile("charset=([a-zA-Z0-9-_]+)");
+        Matcher matcher = pattern.matcher(contentType);
+        if(matcher.find()) {
+            charset = matcher.group(1);
+        }
+
+        try {
+            Document htmlDoc = Jsoup.parse(in,charset,requestUrl);
+
+            RepositoryConnection con = triples.getConnection();
+            ValueFactory vf = con.getValueFactory();
+            URI subject = vf.createURI(resource);
+
+            for (Map.Entry<String, JSoupMapper> mapping : getMappings(resource, requestUrl).entrySet()) {
+                URI predicate = vf.createURI(mapping.getKey());
+
+                final Elements values = mapping.getValue().select(htmlDoc);
+                for(Element value : values) {
+                    List<Value> objects = mapping.getValue().map(resource, value, vf);
+                    for(Value object : objects) {
+                        Statement stmt = vf.createStatement(subject, predicate, object);
+                        con.add(stmt);
+                    }
+                }
+            }
+
+            org.openrdf.model.URI ptype = vf.createURI(Namespaces.NS_RDF + "type");
+
+            for(String typeUri : getTypes(subject)) {
+                Resource type_resource = vf.createURI(typeUri);
+                con.add(vf.createStatement(subject, ptype, type_resource));
+            }
+
+            con.commit();
+            con.close();
+
+            return findAdditionalRequestUrls(resource, htmlDoc, requestUrl);
+
+        } catch (RepositoryException e) {
+            throw new DataRetrievalException("repository error while parsing XML response",e);
+        } catch (IOException e) {
+            throw new DataRetrievalException("I/O error while parsing HTML response",e);
+        }
+
+    }
+
+    protected abstract Map<String, JSoupMapper> getMappings(String resource, String requestUrl);
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssDateLiteralMapper.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssDateLiteralMapper.java b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssDateLiteralMapper.java
new file mode 100644
index 0000000..6d8b129
--- /dev/null
+++ b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssDateLiteralMapper.java
@@ -0,0 +1,96 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.provider.html.mapping;
+
+import at.newmedialab.sesame.commons.model.Namespaces;
+import at.newmedialab.sesame.commons.util.DateUtils;
+import org.jsoup.nodes.Element;
+import org.openrdf.model.Value;
+import org.openrdf.model.ValueFactory;
+
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+
+public class CssDateLiteralMapper extends CssTextLiteralMapper {
+
+    protected DateFormat format = null;
+
+    public CssDateLiteralMapper(String cssSelector) {
+        this(cssSelector, "dateTime");
+    }
+
+    public CssDateLiteralMapper(String cssSelector, String datatype) {
+        super(cssSelector, datatype);
+    }
+
+    public CssDateLiteralMapper(Selector selector) {
+        this(selector, "dateTime");
+    }
+
+    public CssDateLiteralMapper(Selector selector, String datatype) {
+        super(selector, datatype);
+    }
+
+    public CssDateLiteralMapper setParseFormat(String format) {
+        this.format = new SimpleDateFormat(format);
+        return this;
+    }
+
+    public CssDateLiteralMapper setParseFormat(DateFormat format) {
+        this.format = format;
+        return this;
+    }
+
+    @Override
+    public List<Value> map(String resourceUri, Element elem, ValueFactory factory) {
+        final String value = elem.text().trim();
+        Date date = parseDate(value);
+
+        final String dateString;
+        if (date != null) {
+            if ("dateTime".equals(datatype)) {
+                dateString = DateUtils.ISO8601FORMAT.format(date);
+            } else if ("date".equals(datatype)) {
+                dateString = DateUtils.ISO8601FORMAT_DATE.format(date);
+            } else if ("time".equals(datatype)) {
+                dateString = DateUtils.ISO8601FORMAT_TIME.format(date);
+            } else {
+                dateString = value;
+            }
+            return Collections.singletonList((Value) factory.createLiteral(dateString, factory.createURI(Namespaces.NS_XSD + datatype)));
+        } else
+            return Collections.emptyList();
+    }
+
+    protected Date parseDate(final String value) {
+        Date date;
+        if (format != null) {
+            try {
+                date = format.parse(value);
+            } catch (ParseException e) {
+                date = null;
+            }
+        } else {
+            date = DateUtils.parseDate(value);
+        }
+        return date;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssLiteralAttrMapper.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssLiteralAttrMapper.java b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssLiteralAttrMapper.java
new file mode 100644
index 0000000..06f9987
--- /dev/null
+++ b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssLiteralAttrMapper.java
@@ -0,0 +1,89 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.provider.html.mapping;
+
+import at.newmedialab.sesame.commons.model.Namespaces;
+import org.apache.commons.lang.StringUtils;
+import org.jsoup.nodes.Element;
+import org.openrdf.model.Value;
+import org.openrdf.model.ValueFactory;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+public class CssLiteralAttrMapper extends CssSelectorMapper {
+
+    protected final String attr;
+    protected final String datatype;
+    protected final Locale language;
+
+    private CssLiteralAttrMapper(String cssSelector, String attr, Locale lang, String datatype) {
+        super(cssSelector);
+        this.attr = attr;
+        this.language = lang;
+        this.datatype = datatype;
+    }
+
+    private CssLiteralAttrMapper(Selector selector, String attr, Locale lang, String datatype) {
+        super(selector);
+        this.attr = attr;
+        this.language = lang;
+        this.datatype = datatype;
+    }
+
+    public CssLiteralAttrMapper(String cssSelector, String attr, Locale lang) {
+        this(cssSelector, attr, lang, null);
+    }
+
+    public CssLiteralAttrMapper(String cssSelector, String attr, String datatype) {
+        this(cssSelector, attr, null, datatype);
+    }
+
+    public CssLiteralAttrMapper(String cssSelector, String attr) {
+        this(cssSelector, attr, null, null);
+    }
+
+    public CssLiteralAttrMapper(Selector selector, String attr, Locale lang) {
+        this(selector, attr, lang, null);
+    }
+
+    public CssLiteralAttrMapper(Selector selector, String attr, String datatype) {
+        this(selector, attr, null, datatype);
+    }
+
+    public CssLiteralAttrMapper(Selector selector, String attr) {
+        this(selector, attr, null, null);
+    }
+
+    protected String cleanValue(String value) {
+        return value.trim();
+    }
+
+    @Override
+    public List<Value> map(String resourceUri, Element elem, ValueFactory factory) {
+        final String value = cleanValue(elem.attr(attr));
+        if (StringUtils.isBlank(value)) return Collections.emptyList();
+        if (language != null)
+            return Collections.singletonList((Value) factory.createLiteral(value, language.toString()));
+        if (datatype != null)
+            return Collections.singletonList((Value) factory.createLiteral(value, factory.createURI(Namespaces.NS_XSD + datatype)));
+        else
+            return Collections.singletonList((Value) factory.createLiteral(value));
+    }
+
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssSelectorMapper.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssSelectorMapper.java b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssSelectorMapper.java
new file mode 100644
index 0000000..1c1ce5e
--- /dev/null
+++ b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssSelectorMapper.java
@@ -0,0 +1,48 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.provider.html.mapping;
+
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+
+public abstract class CssSelectorMapper implements JSoupMapper {
+
+    protected final Selector selector;
+
+    public CssSelectorMapper(final String cssSelector) {
+        this(new Selector() {
+            @Override
+            public Elements select(Element node) {
+                return node.select(cssSelector);
+            }
+        });
+    }
+
+    public CssSelectorMapper(Selector selector) {
+        this.selector = selector;
+    }
+
+    @Override
+    public Elements select(Element htmlDoc) {
+        return selector.select(htmlDoc);
+    }
+
+    public interface Selector {
+        public Elements select(Element node);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssTextLiteralMapper.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssTextLiteralMapper.java b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssTextLiteralMapper.java
new file mode 100644
index 0000000..fa52074
--- /dev/null
+++ b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssTextLiteralMapper.java
@@ -0,0 +1,86 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.provider.html.mapping;
+
+import at.newmedialab.sesame.commons.model.Namespaces;
+import org.apache.commons.lang.StringUtils;
+import org.jsoup.nodes.Element;
+import org.openrdf.model.Value;
+import org.openrdf.model.ValueFactory;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+public class CssTextLiteralMapper extends CssSelectorMapper {
+
+    protected final String datatype;
+    protected final Locale language;
+
+    private CssTextLiteralMapper(String cssSelector, Locale lang, String datatype) {
+        super(cssSelector);
+        language = lang;
+        this.datatype = datatype;
+    }
+
+    private CssTextLiteralMapper(Selector selector, Locale lang, String datatype) {
+        super(selector);
+        language = lang;
+        this.datatype = datatype;
+    }
+
+    public CssTextLiteralMapper(String cssSelector) {
+        this(cssSelector, null, null);
+    }
+
+    public CssTextLiteralMapper(String cssSelector, Locale lang) {
+        this(cssSelector, lang, null);
+    }
+
+    public CssTextLiteralMapper(String cssSelector, String datatype) {
+        this(cssSelector, null, datatype);
+    }
+
+    public CssTextLiteralMapper(Selector selector) {
+        this(selector, null, null);
+    }
+
+    public CssTextLiteralMapper(Selector selector, Locale lang) {
+        this(selector, lang, null);
+    }
+
+    public CssTextLiteralMapper(Selector selector, String datatype) {
+        this(selector, null, datatype);
+    }
+
+    protected String cleanValue(String value) {
+        return value.trim();
+    }
+
+    @Override
+    public List<Value> map(String resourceUri, Element elem, ValueFactory factory) {
+        final String value = cleanValue(elem.text());
+        if (StringUtils.isBlank(value)) return Collections.emptyList();
+        if (language != null)
+            return Collections.singletonList((Value) factory.createLiteral(value, language.toString()));
+        if (datatype != null)
+            return Collections.singletonList((Value) factory.createLiteral(value, factory.createURI(Namespaces.NS_XSD + datatype)));
+        else
+            return Collections.singletonList((Value) factory.createLiteral(value));
+    }
+
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssUriAttrBlacklistQueryParamsMapper.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssUriAttrBlacklistQueryParamsMapper.java b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssUriAttrBlacklistQueryParamsMapper.java
new file mode 100644
index 0000000..a79ae4a
--- /dev/null
+++ b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssUriAttrBlacklistQueryParamsMapper.java
@@ -0,0 +1,58 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.provider.html.mapping;
+
+import org.apache.http.NameValuePair;
+import org.apache.http.client.utils.URIBuilder;
+import org.apache.http.client.utils.URLEncodedUtils;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Arrays;
+import java.util.List;
+
+public class CssUriAttrBlacklistQueryParamsMapper extends CssUriAttrMapper {
+
+    protected List<String> queryParams;
+
+    public CssUriAttrBlacklistQueryParamsMapper(String cssSelector, String attr, String... queryParamWhitelist) {
+        super(cssSelector, attr);
+        queryParams = Arrays.asList(queryParamWhitelist);
+    }
+
+    public CssUriAttrBlacklistQueryParamsMapper(Selector selector, String attr, String... queryParamWhitelist) {
+        super(selector, attr);
+        queryParams = Arrays.asList(queryParamWhitelist);
+    }
+
+    @Override
+    protected String rewriteUrl(String url) {
+        try {
+            URI u = new URI(url);
+            URIBuilder builder = new URIBuilder(u).removeQuery();
+            for (NameValuePair p : URLEncodedUtils.parse(u, "UTF-8")) {
+                if (!queryParams.contains(p.getName())) {
+                    builder.setParameter(p.getName(), p.getValue());
+                }
+            }
+            final String string = builder.build().toString();
+            return super.rewriteUrl(string);
+        } catch (URISyntaxException e) {
+            return super.rewriteUrl(url);
+        }
+    }
+
+}