You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by wi...@apache.org on 2013/02/19 13:52:00 UTC
[32/52] [partial] code contribution,
initial import of relevant modules of LMF-3.0.0-SNAPSHOT based on
revision 4bf944319368 of the default branch at https://code.google.com/p/lmf/
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/.settings/org.eclipse.jdt.core.prefs
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/.settings/org.eclipse.jdt.core.prefs b/ldclient/ldclient-core/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..69c31cd
--- /dev/null
+++ b/ldclient/ldclient-core/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,8 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+org.eclipse.jdt.core.compiler.compliance=1.6
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.6
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/.settings/org.eclipse.m2e.core.prefs
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/.settings/org.eclipse.m2e.core.prefs b/ldclient/ldclient-core/.settings/org.eclipse.m2e.core.prefs
new file mode 100644
index 0000000..f897a7f
--- /dev/null
+++ b/ldclient/ldclient-core/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/.settings/org.eclipse.wst.common.component
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/.settings/org.eclipse.wst.common.component b/ldclient/ldclient-core/.settings/org.eclipse.wst.common.component
new file mode 100644
index 0000000..adae079
--- /dev/null
+++ b/ldclient/ldclient-core/.settings/org.eclipse.wst.common.component
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project-modules id="moduleCoreId" project-version="1.5.0">
+ <wb-module deploy-name="ldclient-core">
+ <wb-resource deploy-path="/" source-path="/src/main/java"/>
+ <wb-resource deploy-path="/" source-path="/src/main/resources"/>
+ </wb-module>
+</project-modules>
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/.settings/org.eclipse.wst.common.project.facet.core.xml
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/.settings/org.eclipse.wst.common.project.facet.core.xml b/ldclient/ldclient-core/.settings/org.eclipse.wst.common.project.facet.core.xml
new file mode 100644
index 0000000..c78d932
--- /dev/null
+++ b/ldclient/ldclient-core/.settings/org.eclipse.wst.common.project.facet.core.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<faceted-project>
+ <installed facet="java" version="1.6"/>
+ <installed facet="jst.utility" version="1.0"/>
+</faceted-project>
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/pom.xml
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/pom.xml b/ldclient/ldclient-core/pom.xml
new file mode 100644
index 0000000..f6a6133
--- /dev/null
+++ b/ldclient/ldclient-core/pom.xml
@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Copyright (c) 2013 Salzburg Research.
+ ~
+ ~ Licensed under the Apache License, Version 2.0 (the "License");
+ ~ you may not use this file except in compliance with the License.
+ ~ You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>at.newmedialab.lmf</groupId>
+ <artifactId>ldclient-parent</artifactId>
+ <version>3.0.0-SNAPSHOT</version>
+ <relativePath>../</relativePath>
+ </parent>
+
+ <artifactId>ldclient-core</artifactId>
+ <name>LDClient: Core Library</name>
+
+ <description>
+ Linked Data Client Core Library, provides the basic resource access functionality, including
+ HTTP connection handing and content negotiation support.
+ </description>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>2.4</version>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ <configuration>
+ <includes>
+ <include>**/test/helper/**</include>
+ </includes>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>at.newmedialab.lmf</groupId>
+ <artifactId>ldclient-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.openrdf.sesame</groupId>
+ <artifactId>sesame-model</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.openrdf.sesame</groupId>
+ <artifactId>sesame-repository-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>at.newmedialab.sesame</groupId>
+ <artifactId>sesame-commons</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </dependency>
+
+ <!-- logging -->
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-ext</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jcl-over-slf4j</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>log4j-over-slf4j</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jul-to-slf4j</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpclient</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpclient-cache</artifactId>
+ </dependency>
+
+
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-simple</artifactId>
+ <version>1.7.2</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/ldclient/LDClient.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/ldclient/LDClient.java b/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/ldclient/LDClient.java
new file mode 100644
index 0000000..64d7426
--- /dev/null
+++ b/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/ldclient/LDClient.java
@@ -0,0 +1,413 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.services.ldclient;
+
+import org.apache.commons.lang.NotImplementedException;
+import org.apache.http.Header;
+import org.apache.http.HttpRequest;
+import org.apache.http.HttpResponse;
+import org.apache.http.HttpStatus;
+import org.apache.http.ProtocolException;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.HttpRequestRetryHandler;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpHead;
+import org.apache.http.client.params.ClientPNames;
+import org.apache.http.conn.ClientConnectionManager;
+import org.apache.http.conn.scheme.PlainSocketFactory;
+import org.apache.http.conn.scheme.Scheme;
+import org.apache.http.conn.scheme.SchemeRegistry;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.impl.client.DefaultRedirectStrategy;
+import org.apache.http.impl.conn.PoolingClientConnectionManager;
+import org.apache.http.params.BasicHttpParams;
+import org.apache.http.params.CoreConnectionPNames;
+import org.apache.http.params.CoreProtocolPNames;
+import org.apache.http.params.HttpParams;
+import org.apache.http.protocol.HttpContext;
+import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+import org.apache.marmotta.ldclient.api.ldclient.LDClientService;
+import org.apache.marmotta.ldclient.api.provider.DataProvider;
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.model.ClientConfiguration;
+import org.apache.marmotta.ldclient.model.ClientResponse;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ServiceLoader;
+import java.util.Set;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+
+import javax.net.ssl.SSLEngineResult.Status;
+
+/**
+ * Add file description here!
+ * <p/>
+ * User: sschaffe
+ */
+public final class LDClient implements LDClientService {
+
+ private static Logger log = LoggerFactory.getLogger(LDClient.class);
+
+ /**
+ * A Java service loader loading all implementations of data providers registered on the classpath.
+ */
+ private static ServiceLoader<DataProvider> providers = ServiceLoader.load(DataProvider.class);
+
+ /**
+ * A Java service loader loading all auto-registered endpoint configurations on the classpath.
+ */
+ private static ServiceLoader<Endpoint> defaultEndpoints = ServiceLoader.load(Endpoint.class);
+
+ private HttpClient client;
+
+ private IdleConnectionMonitorThread idleConnectionMonitorThread;
+
+ // limit maximum parallel retrievals of resources
+ private Semaphore retrievalSemaphore;
+
+ private ClientConfiguration config;
+
+ private List<Endpoint> endpoints;
+
+ public LDClient() {
+ this(new ClientConfiguration());
+ }
+
+ public LDClient(ClientConfiguration config) {
+ log.info("Initialising Linked Data Client Service ...");
+
+ this.config = config;
+
+ endpoints = new ArrayList<Endpoint>();
+ for(Endpoint endpoint : defaultEndpoints) {
+ endpoints.add(endpoint);
+ }
+ endpoints.addAll(config.getEndpoints());
+
+ Collections.sort(endpoints);
+ if(log.isInfoEnabled()) {
+ for(Endpoint endpoint : endpoints) {
+ log.info("- LDClient Endpoint: {}", endpoint.getName());
+ }
+ }
+
+ retrievalSemaphore = new Semaphore(config.getMaxParallelRequests());
+
+ if (config.getHttpClient() != null) {
+ log.debug("Using HttpClient provided in the configuration");
+ this.client = config.getHttpClient();
+ } else {
+ log.debug("Creating default HttpClient based on the configuration");
+
+ HttpParams httpParams = new BasicHttpParams();
+ httpParams.setParameter(CoreProtocolPNames.USER_AGENT, "Apache Marmotta LDClient");
+
+ httpParams.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, config.getSocketTimeout());
+ httpParams.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, config.getConnectionTimeout());
+
+ httpParams.setBooleanParameter(ClientPNames.HANDLE_REDIRECTS,true);
+ httpParams.setIntParameter(ClientPNames.MAX_REDIRECTS,3);
+
+
+ SchemeRegistry schemeRegistry = new SchemeRegistry();
+ schemeRegistry.register(
+ new Scheme("http", 80, PlainSocketFactory.getSocketFactory()));
+
+ PoolingClientConnectionManager cm = new PoolingClientConnectionManager(schemeRegistry);
+ cm.setMaxTotal(20);
+ cm.setDefaultMaxPerRoute(10);
+
+ DefaultHttpClient client = new DefaultHttpClient(cm,httpParams);
+ client.setRedirectStrategy(new LMFRedirectStrategy());
+ client.setHttpRequestRetryHandler(new LMFHttpRequestRetryHandler());
+ idleConnectionMonitorThread = new IdleConnectionMonitorThread(client.getConnectionManager());
+ idleConnectionMonitorThread.start();
+
+ this.client = client;
+ }
+
+ for(DataProvider provider : providers) {
+ log.info("data provider: {}",provider.getName());
+ }
+ }
+
+ @Override
+ public boolean ping(String resource) {
+ //crappy implementation only for http
+ if (resource.startsWith("http://")) {
+ try {
+ return (200 == client.execute(new HttpHead(resource)).getStatusLine().getStatusCode());
+ } catch (Exception e) {
+ log.error(e.getMessage());
+ return false;
+ }
+ } else {
+ throw new NotImplementedException("protocol not supportted");
+ }
+
+ /*
+ and here what must be the actual implementation
+ try {
+ retrievalSemaphore.acquire();
+ if(!config.isExcludedUri(resource)) {
+
+ Endpoint endpoint = getEndpoint(resource);
+
+ if(endpoint != null) {
+ DataProvider provider = getDataProvider(endpoint);
+ if(provider != null) {
+ return provider.ping(resource, this, endpoint);
+ } else {
+ log.error("no service provider for type {}",endpoint.getType());
+ }
+ } else {
+ // TODO: the fallback should at least be a Linked Data handler, so maybe we should merge the ldclient-provider-rdf?
+ // TODO: determine service provider from connection handshaking / MIME type
+ throw new UnsupportedOperationException("not implemented: determine service provider from connection handshaking / MIME type");
+ }
+ } else {
+ log.error("cannot retrieve a local resource; linked data caching only allowed for remote resources");
+ }
+ } catch (InterruptedException e) {
+ log.warn("retrieval of resource was interruped: {}",resource);
+ } finally {
+ retrievalSemaphore.release();
+ }
+ return false;
+ */
+ }
+
+
+ @Override
+ public void shutdown() {
+ if(config.getHttpClient() == null) {
+ // we manage our own connection pool
+ if (idleConnectionMonitorThread != null)
+ idleConnectionMonitorThread.shutdown();
+ client.getConnectionManager().shutdown();
+ }
+ }
+
+
+
+ /**
+ * Retrieve all triples for this resource from the Linked Data Cloud. Retrieval will be carried out according
+ * to the endpoint definition that matches this resource. In case no endpoint definition is found, the method
+ * will try an "default" Linked Data retrieval if the configuration option "ldcache.fallback" is set to true
+ *
+ *
+ *
+ * @param resource the URI resource for which to retrieve the triples
+ * @return a Sesame in-memory repository containing the triples for this resource
+ */
+ @Override
+ public ClientResponse retrieveResource(String resource) throws DataRetrievalException {
+ try {
+ retrievalSemaphore.acquire();
+ if(!config.isExcludedUri(resource)) {
+
+ Endpoint endpoint = getEndpoint(resource);
+
+ if(endpoint != null) {
+ DataProvider provider = getDataProvider(endpoint);
+ if(provider != null) {
+ return provider.retrieveResource(resource, this, endpoint);
+ } else {
+ log.error("no service provider for type {}",endpoint.getType());
+ }
+ } else {
+ // TODO: the fallback should at least be a Linked Data handler, so maybe we should merge the ldclient-provider-rdf?
+ // TODO: determine service provider from connection handshaking / MIME type
+ throw new UnsupportedOperationException("not implemented: determine service provider from connection handshaking / MIME type");
+ }
+ } else {
+ log.error("cannot retrieve a local resource; linked data caching only allowed for remote resources");
+ }
+ } catch (InterruptedException e) {
+ log.warn("retrieval of resource was interruped: {}",resource);
+ } finally {
+ retrievalSemaphore.release();
+ }
+
+
+ return null;
+ }
+
+ /**
+ * Get access to the Apache HTTP Client managed by the connection handler to execute
+ * a request.
+ *
+ * @return
+ */
+ @Override
+ public HttpClient getClient() {
+ return client;
+ }
+
+ /**
+ * Get the client configuration used by the connection handler
+ *
+ * @return
+ */
+ @Override
+ public ClientConfiguration getClientConfiguration() {
+ return config;
+ }
+
+
+ /**
+ * Retrieve the endpoint matching the KiWiUriResource passed as argument. The endpoint is determined by
+ * matching the endpoint's URI prefix with the resource URI. If no matching endpoint exists, returns null.
+ * The LinkedDataClientService can then decide (based on configuration) whether to try with a standard
+ * LinkedDataRequest or ignore the request.
+ *
+ * @param resource the KiWiUriResource to check.
+ */
+ @Override
+ public Endpoint getEndpoint(String resource) {
+ for(Endpoint endpoint : endpoints) {
+ if (endpoint.handles(resource)) return endpoint;
+ }
+
+ return null;
+ }
+
+ /**
+ * Test whether an endpoint definition for the given url pattern already exists.
+ *
+ * @param urlPattern
+ * @return
+ */
+ @Override
+ public boolean hasEndpoint(String urlPattern) {
+ for(Endpoint endpoint : endpoints) {
+ if(endpoint.getUriPattern() != null && endpoint.getUriPattern().equals(urlPattern)) return true;
+ }
+ return false;
+ }
+
+ /**
+ * Return a collection of all available data providers (i.e. registered through the service loader).
+ * @return
+ */
+ @Override
+ public Set<DataProvider> getDataProviders() {
+ Set<DataProvider> result = new HashSet<DataProvider>();
+ for(DataProvider p : providers) {
+ result.add(p);
+ }
+ return result;
+ }
+
+
+ private DataProvider getDataProvider(Endpoint endpoint) {
+ for(DataProvider provider : providers) {
+ if(endpoint.getType().equalsIgnoreCase(provider.getName())) return provider;
+ }
+ return null;
+ }
+
+ private static class LMFRedirectStrategy extends DefaultRedirectStrategy {
+ @Override
+ public boolean isRedirected(HttpRequest request, HttpResponse response, HttpContext context) throws ProtocolException {
+ if (response == null) throw new IllegalArgumentException("HTTP response may not be null");
+
+ int statusCode = response.getStatusLine().getStatusCode();
+ String method = request.getRequestLine().getMethod();
+ Header locationHeader = response.getFirstHeader("location");
+ switch (statusCode) {
+ case HttpStatus.SC_MOVED_TEMPORARILY:
+ return (method.equalsIgnoreCase(HttpGet.METHOD_NAME)
+ || method.equalsIgnoreCase(HttpHead.METHOD_NAME)) && locationHeader != null;
+ case HttpStatus.SC_MOVED_PERMANENTLY:
+ case HttpStatus.SC_TEMPORARY_REDIRECT:
+ return method.equalsIgnoreCase(HttpGet.METHOD_NAME)
+ || method.equalsIgnoreCase(HttpHead.METHOD_NAME);
+ case HttpStatus.SC_SEE_OTHER:
+ return true;
+ case HttpStatus.SC_MULTIPLE_CHOICES:
+ return true;
+ default:
+ return false;
+ } //end of switch
+ }
+ }
+
+ private static class LMFHttpRequestRetryHandler implements HttpRequestRetryHandler {
+ /**
+ * Determines if a method should be retried after an IOException
+ * occurs during execution.
+ *
+ * @param exception the exception that occurred
+ * @param executionCount the number of times this method has been
+ * unsuccessfully executed
+ * @param context the context for the request execution
+ * @return <code>true</code> if the method should be retried, <code>false</code>
+ * otherwise
+ */
+ @Override
+ public boolean retryRequest(IOException exception, int executionCount, HttpContext context) {
+ return false;
+ }
+ }
+
+ private static class IdleConnectionMonitorThread extends Thread {
+
+ private final ClientConnectionManager connMgr;
+ private volatile boolean shutdown;
+
+ public IdleConnectionMonitorThread(ClientConnectionManager connMgr) {
+ super("LD HTTP Client Idle Connection Manager");
+ this.connMgr = connMgr;
+ setDaemon(true);
+ }
+
+ @Override
+ public void run() {
+ try {
+ while (!shutdown) {
+ synchronized (this) {
+ wait(5000);
+ // Close expired connections
+ connMgr.closeExpiredConnections();
+ // Optionally, close connections
+ // that have been idle longer than 30 sec
+ connMgr.closeIdleConnections(30, TimeUnit.SECONDS);
+ }
+ }
+ } catch (InterruptedException ex) {
+ // terminate
+ }
+ }
+
+ public void shutdown() {
+ shutdown = true;
+ synchronized (this) {
+ notifyAll();
+ }
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/AbstractHttpProvider.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/AbstractHttpProvider.java b/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/AbstractHttpProvider.java
new file mode 100644
index 0000000..c6bd0f8
--- /dev/null
+++ b/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/AbstractHttpProvider.java
@@ -0,0 +1,298 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.services.provider;
+
+import org.apache.http.Header;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.cookie.DateParseException;
+import org.apache.http.impl.cookie.DateUtils;
+import org.apache.http.util.EntityUtils;
+import org.apache.marmotta.commons.collections.CollectionUtils;
+import org.apache.marmotta.commons.http.ContentType;
+import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+import org.apache.marmotta.ldclient.api.ldclient.LDClientService;
+import org.apache.marmotta.ldclient.api.provider.DataProvider;
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.model.ClientResponse;
+import org.openrdf.repository.Repository;
+import org.openrdf.repository.RepositoryConnection;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.repository.sail.SailRepository;
+import org.openrdf.sail.memory.MemoryStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Queue;
+import java.util.Set;
+
+import static org.apache.marmotta.commons.http.LMFHttpUtils.parseContentType;
+
+/**
+ * Add file description here!
+ * <p/>
+ * Author: Sebastian Schaffert
+ */
+public abstract class AbstractHttpProvider implements DataProvider {
+
+ private static Logger log = LoggerFactory.getLogger(AbstractHttpProvider.class);
+
+ /**
+ * Build the URL to use to call the webservice in order to retrieve the data for the resource passed as argument.
+ * In many cases, this will just return the URI of the resource (e.g. Linked Data), but there might be data providers
+ * that use different means for accessing the data for a resource, e.g. SPARQL or a Cache.
+ *
+ *
+ *
+ * @param resourceUri
+ * @param endpoint endpoint configuration for the data provider (optional)
+ * @return
+ */
+ protected abstract List<String> buildRequestUrl(String resourceUri, Endpoint endpoint) throws DataRetrievalException;
+
+ /**
+ * Parse the HTTP response entity returned by the web service call and return its contents in a Sesame RDF
+ * repository also passed as argument. The content type returned by the web service is passed as argument to help
+ * the implementation decide how to parse the data. The implementation can return a list of additional pages to
+ * retrieve for completing the data of the resource
+ *
+ *
+ *
+ *
+ * @param resourceUri
+ * @param repository an RDF repository for storing an RDF representation of the dataset located at the remote resource.
+ * @param in input stream as returned by the remote webservice
+ * @param contentType content type as returned in the HTTP headers of the remote webservice
+ * @return a possibly empty list of URLs of additional resources to retrieve to complete the content
+ * @throws java.io.IOException in case an error occurs while reading the input stream
+ */
+ protected abstract List<String> parseResponse(String resourceUri, String requestUrl, Repository repository, InputStream in, String contentType) throws DataRetrievalException;
+
+ /**
+ * Retrieve the data for a resource using the given http client and endpoint definition. The service is
+ * supposed to manage the connection handling itself. See {@link AbstractHttpProvider}
+ * for a generic implementation of this method.
+ *
+ *
+ *
+ * @param resource the resource to be retrieved
+ * @param endpoint the endpoint definition
+ * @return a completely specified client response, including expiry information and the set of triples
+ */
+ @Override
+ public ClientResponse retrieveResource(String resource, LDClientService client, Endpoint endpoint) throws DataRetrievalException {
+
+ try {
+
+ String contentType;
+ if(endpoint != null && endpoint.getContentTypes().size() > 0) {
+ contentType = CollectionUtils.fold(endpoint.getContentTypes(), new CollectionUtils.StringSerializer<ContentType>() {
+ @Override
+ public String serialize(ContentType contentType) {
+ return contentType.toString("q");
+ }
+ },",");
+ } else {
+ contentType = CollectionUtils.fold(Arrays.asList(listMimeTypes()), ",");
+ }
+
+ long defaultExpires = client.getClientConfiguration().getDefaultExpiry();
+ if(endpoint != null && endpoint.getDefaultExpiry() != null) {
+ defaultExpires = endpoint.getDefaultExpiry();
+ }
+
+ final ResponseHandler handler = new ResponseHandler(resource, endpoint);
+
+ // a queue for queuing the request URLs needed to build the query response
+ Queue<String> requestUrls = new LinkedList<String>();
+ requestUrls.addAll(buildRequestUrl(resource, endpoint));
+
+ Set<String> visited = new HashSet<String>();
+
+ String requestUrl = requestUrls.poll();
+ while(requestUrl != null) {
+
+ if(!visited.contains(requestUrl)) {
+ HttpGet get = new HttpGet(requestUrl);
+ try {
+ get.setHeader("Accept",contentType);
+ get.setHeader("Accept-Language", "*"); // PoolParty compatibility
+
+ log.info("retrieving resource data for {} from '{}' endpoint, request URI is <{}>", new Object[] {resource, getName(), get.getURI().toASCIIString()});
+
+ handler.requestUrl = requestUrl;
+ List<String> additionalRequestUrls = client.getClient().execute(get, handler);
+ requestUrls.addAll(additionalRequestUrls);
+
+ visited.add(requestUrl);
+ } finally {
+ get.releaseConnection();
+ }
+ }
+
+ requestUrl = requestUrls.poll();
+ }
+
+ Date expiresDate = handler.expiresDate;
+ if (expiresDate == null) {
+ expiresDate = new Date(System.currentTimeMillis() + defaultExpires * 1000);
+ }
+
+ long min_expires = System.currentTimeMillis() + client.getClientConfiguration().getMinimumExpiry() * 1000;
+ if (expiresDate.getTime() < min_expires) {
+ log.info("expiry time returned by request lower than minimum expiration time; using minimum time instead");
+ expiresDate = new Date(min_expires);
+ }
+
+ if(log.isInfoEnabled()) {
+ RepositoryConnection con = handler.triples.getConnection();
+ log.info("retrieved {} triples for resource {}; expiry date: {}",new Object[] {con.size(),resource,expiresDate});
+ con.close();
+ }
+
+ ClientResponse result = new ClientResponse(handler.triples);
+ result.setExpires(expiresDate);
+ return result;
+ } catch (RepositoryException e) {
+ log.error("error while initialising Sesame repository; classpath problem?",e);
+ throw new DataRetrievalException("error while initialising Sesame repository; classpath problem?",e);
+ } catch (ClientProtocolException e) {
+ log.error("HTTP client error while trying to retrieve resource {}: {}", resource, e.getMessage());
+ throw new DataRetrievalException("I/O error while trying to retrieve resource "+resource,e);
+ } catch (IOException e) {
+ log.error("I/O error while trying to retrieve resource {}: {}", resource, e.getMessage());
+ throw new DataRetrievalException("I/O error while trying to retrieve resource "+resource,e);
+ } catch(RuntimeException ex) {
+ log.error("Unknown error while trying to retrieve resource {}: {}", resource, ex.getMessage());
+ throw new DataRetrievalException("Unknown error while trying to retrieve resource "+resource,ex);
+ }
+
+ }
+
+
+
+ /**
+ * Check whether the content type returned by the server is acceptable to the endpoint and data provider
+ */
+ protected boolean isValidContentType(String contentType, Endpoint endpoint) {
+ if(endpoint != null && endpoint.getContentTypes().size() > 0) {
+ ContentType parsed = parseContentType(contentType);
+ for(ContentType valid : endpoint.getContentTypes()) {
+ if(valid.matches(parsed) || valid.matchesWildcard(parsed)) {
+ return true;
+ }
+ }
+ return false;
+ } else {
+ // TODO: should probably be removed, since it is not used
+ for(String type : listMimeTypes()) {
+ if(type.split(";")[0].equalsIgnoreCase(contentType)) return true;
+ }
+ return false;
+ }
+ }
+
+ private class ResponseHandler implements org.apache.http.client.ResponseHandler<List<String>> {
+
+ private Date expiresDate;
+
+ private String requestUrl;
+
+ // the repository where the triples will be stored in case the data providers return them
+ private final Repository triples;
+
+ private final Endpoint endpoint;
+
+ private final String resource;
+
+ public ResponseHandler(String resource, Endpoint endpoint) throws RepositoryException {
+ this.resource = resource;
+ this.endpoint = endpoint;
+
+ triples = new SailRepository(new MemoryStore());
+ triples.initialize();
+ }
+
+ @Override
+ public List<String> handleResponse(HttpResponse response) throws ClientProtocolException, IOException {
+ ArrayList<String> requestUrls = new ArrayList<String>();
+
+ if (response.getStatusLine().getStatusCode() >= 200 && response.getStatusLine().getStatusCode() < 400) {
+ final HttpEntity entity = response.getEntity();
+ if (entity == null)
+ throw new IOException("no content returned by Linked Data resource " + resource);
+
+ if (!isValidContentType(entity.getContentType().getValue().split(";")[0], endpoint)) {
+ // FIXME: here was get.abort()
+ throw new IOException("invalid content returned by Linked Data resource " + resource + ": "
+ + entity.getContentType().getValue());
+ }
+
+
+ if (entity != null) {
+ String parseContentType = "application/rdf+xml";
+ if (endpoint != null && "SPARQL".equals(endpoint.getType())) {
+ parseContentType = "application/sparql-results+xml";
+ } else if (entity.getContentType() != null) {
+ parseContentType = entity.getContentType().getValue().split(";")[0];
+ }
+
+ InputStream in = entity.getContent();
+ try {
+
+ List<String> urls = parseResponse(resource, requestUrl, triples, in, parseContentType);
+ requestUrls.addAll(urls);
+
+ if (expiresDate == null) {
+ Header expires = response.getFirstHeader("Expires");
+ if (expires != null) {
+ try {
+ expiresDate = DateUtils.parseDate(expires.getValue());
+ } catch (DateParseException e) {
+ log.debug("error parsing Expires: header");
+ }
+ }
+ }
+
+ } catch (DataRetrievalException e) {
+ // FIXME: get.abort();
+ throw new IOException(e);
+ } finally {
+ in.close();
+ }
+ }
+ EntityUtils.consume(entity);
+ } else {
+ log.error("the HTTP request failed (status: {})", response.getStatusLine());
+ throw new ClientProtocolException("the HTTP request failed (status: " + response.getStatusLine() + ")");
+ }
+
+ return requestUrls;
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/BlockingProvider.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/BlockingProvider.java b/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/BlockingProvider.java
new file mode 100644
index 0000000..0388985
--- /dev/null
+++ b/ldclient/ldclient-core/src/main/java/org/apache/marmotta/ldclient/services/provider/BlockingProvider.java
@@ -0,0 +1,92 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.services.provider;
+
+import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+import org.apache.marmotta.ldclient.api.ldclient.LDClientService;
+import org.apache.marmotta.ldclient.api.provider.DataProvider;
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.model.ClientResponse;
+import org.openrdf.repository.Repository;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.repository.sail.SailRepository;
+import org.openrdf.sail.memory.MemoryStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Date;
+
+/**
+ * Add file description here!
+ * <p/>
+ * Author: Sebastian Schaffert (sschaffert@apache.org)
+ */
+public class BlockingProvider implements DataProvider {
+
+
+ public static final String PROVIDER_NAME = "NONE";
+
+ private static Logger log = LoggerFactory.getLogger(BlockingProvider.class);
+
+ private static final Repository empty_repository = new SailRepository(new MemoryStore());
+ static {
+ try {
+ empty_repository.initialize();
+ } catch (RepositoryException e) {
+ }
+ }
+
+ /**
+ * Return the name of this data provider. To be used e.g. in the configuration and in log messages.
+ *
+ * @return
+ */
+ @Override
+ public String getName() {
+ return PROVIDER_NAME;
+ }
+
+ /**
+ * Return the list of mime types accepted by this data provider.
+ *
+ * @return
+ */
+ @Override
+ public String[] listMimeTypes() {
+ return new String[0];
+ }
+
+ /**
+ * Retrieve the data for a resource using the given http client and endpoint definition. The service is
+ * supposed to manage the connection handling itself. See AbstractHttpProvider
+ * for a generic implementation of this method.
+ *
+ * @param resource the resource to be retrieved
+ * @param endpoint the endpoint definition
+ * @return a completely specified client response, including expiry information and the set of triples
+ */
+ @Override
+ public ClientResponse retrieveResource(String resource, LDClientService client, Endpoint endpoint) throws DataRetrievalException {
+ log.info("blocked retrieval of resource {}", resource);
+
+ long defaultExpires = client.getClientConfiguration().getDefaultExpiry();
+ Date expiresDate = new Date(System.currentTimeMillis() + defaultExpires * 1000);
+
+ ClientResponse result = new ClientResponse(empty_repository);
+ result.setExpires(expiresDate);
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/main/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/main/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider b/ldclient/ldclient-core/src/main/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider
new file mode 100644
index 0000000..b0d1696
--- /dev/null
+++ b/ldclient/ldclient-core/src/main/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider
@@ -0,0 +1 @@
+org.apache.marmotta.ldclient.services.provider.BlockingProvider
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyEndpoint.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyEndpoint.java b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyEndpoint.java
new file mode 100644
index 0000000..caa1b97
--- /dev/null
+++ b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyEndpoint.java
@@ -0,0 +1,12 @@
+package org.apache.marmotta.ldclient.dummy;
+
+import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+
+public class DummyEndpoint extends Endpoint {
+
+ public DummyEndpoint() {
+ super("Dummy", "Dummy", "^http://127.1.2.3", null, 86400l);
+ setPriority(PRIORITY_HIGH);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyProvider.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyProvider.java b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyProvider.java
new file mode 100644
index 0000000..074c8d6
--- /dev/null
+++ b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/dummy/DummyProvider.java
@@ -0,0 +1,51 @@
+package org.apache.marmotta.ldclient.dummy;
+
+import java.io.IOException;
+
+import org.apache.http.HttpResponse;
+import org.apache.http.StatusLine;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.ResponseHandler;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+import org.apache.marmotta.ldclient.api.ldclient.LDClientService;
+import org.apache.marmotta.ldclient.api.provider.DataProvider;
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.model.ClientResponse;
+
+public class DummyProvider implements DataProvider {
+
+ @Override
+ public String getName() {
+ return "Dummy";
+ }
+
+ @Override
+ public String[] listMimeTypes() {
+ return new String[] {"application/dummy"};
+ }
+
+ @Override
+ public ClientResponse retrieveResource(String resource,
+ LDClientService client, Endpoint endpoint)
+ throws DataRetrievalException {
+
+ try {
+ final HttpGet request = new HttpGet(resource);
+ client.getClient().execute(request, new ResponseHandler<String>() {
+
+ @Override
+ public String handleResponse(HttpResponse response)
+ throws ClientProtocolException, IOException {
+ StatusLine sL = response.getStatusLine();
+ return sL.getReasonPhrase();
+ }
+ });
+ return null;
+ } catch (Exception e) {
+ throw new DataRetrievalException(e);
+ }
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/TestLDClientTest.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/TestLDClientTest.java b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/TestLDClientTest.java
new file mode 100644
index 0000000..63d363f
--- /dev/null
+++ b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/TestLDClientTest.java
@@ -0,0 +1,44 @@
+package org.apache.marmotta.ldclient.test;
+
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.services.ldclient.LDClient;
+import org.apache.marmotta.ldclient.test.helper.TestLDClient;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestLDClientTest {
+
+ private TestLDClient client;
+
+ @Before
+ public void setUp() {
+ TestLDClient testLDClient = new TestLDClient(new LDClient());
+ client = testLDClient;
+ }
+
+ @After
+ public void cleanUp() {
+ client.shutdown();
+ }
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testConnectionRefused() throws Exception {
+ client.retrieveResource("http://no.host.for/this/url");
+ Assert.fail();
+ }
+
+ @Test(expected = DataRetrievalException.class)
+ public void testLocalhostInvalidPort() throws Exception {
+ client.retrieveResource("http://127.1.2.3:-1/");
+ Assert.fail();
+ }
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testMissingProvider() throws Exception {
+ client.retrieveResource("ftp://no.provider.for/this/url");
+ Assert.fail();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/helper/TestLDClient.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/helper/TestLDClient.java b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/helper/TestLDClient.java
new file mode 100644
index 0000000..d032e73
--- /dev/null
+++ b/ldclient/ldclient-core/src/test/java/org/apache/marmotta/ldclient/test/helper/TestLDClient.java
@@ -0,0 +1,209 @@
+package org.apache.marmotta.ldclient.test.helper;
+
+import java.io.IOException;
+import java.net.ConnectException;
+import java.net.SocketTimeoutException;
+import java.net.UnknownHostException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import org.apache.http.client.HttpClient;
+import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+import org.apache.marmotta.ldclient.api.ldclient.LDClientService;
+import org.apache.marmotta.ldclient.api.provider.DataProvider;
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.model.ClientConfiguration;
+import org.apache.marmotta.ldclient.model.ClientResponse;
+import org.junit.Assume;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This is a simple wrapper to use in UnitTests, which handles typical
+ * Exceptions when contacting remote resources.
+ *
+ * All methods except {@link #retrieveResource(String)} are relayed to the
+ * delegate. {@link #retrieveResource(String)} checks for common retrieval
+ * errors such as "IOException: Connection refused" and deactivates any ongoing
+ * Unit-Test using {@link Assume}.
+ *
+ */
+public class TestLDClient implements LDClientService {
+
+ private static final Logger log = LoggerFactory.getLogger(TestLDClient.class);
+
+ /**
+ * The default checks were shamelessly taken from Apache Stanbol.
+ * @see <a href="http://svn.apache.org/repos/asf/stanbol/trunk/enhancer/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/RemoteServiceHelper.java">http://svn.apache.org/repos/asf/stanbol/trunk/enhancer/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/RemoteServiceHelper.java</a>
+ */
+ public static final List<Check> DEFAULT_CHECKS;
+ static {
+ LinkedList<Check> dc = new LinkedList<Check>();
+
+ dc.add(new Check(UnknownHostException.class));
+ dc.add(new Check(SocketTimeoutException.class));
+ dc.add(new Check(IOException.class, "Connection refused"));
+ dc.add(new Check(IOException.class, "Server returned HTTP response code: 50"));
+ dc.add(new Check(ConnectException.class, "unreachable"));
+
+ DEFAULT_CHECKS = Collections.unmodifiableList(dc);
+ }
+
+ private final LDClientService delegate;
+ private final List<Check> extraCheck;
+ private boolean defaultChecks = true;
+
+ public TestLDClient(LDClientService delegate) {
+ this(delegate, new LinkedList<Check>());
+ }
+
+ public TestLDClient(LDClientService delegate, LinkedList<Check> extraChecks) {
+ this(delegate, extraChecks, true);
+ }
+
+ public TestLDClient(LDClientService delegate, LinkedList<Check> extraChecks, boolean defaultChecks) {
+ this.delegate = delegate;
+ this.extraCheck = extraChecks;
+ this.defaultChecks = defaultChecks;
+ }
+
+ @Override
+ public boolean ping(String resource) {
+ return delegate.ping(resource);
+ }
+
+ @Override
+ public ClientResponse retrieveResource(String resource)
+ throws DataRetrievalException {
+ try {
+ return delegate.retrieveResource(resource);
+ } catch (final DataRetrievalException e) {
+ if (defaultChecks) {
+ for (Check exCheck : DEFAULT_CHECKS) {
+ exCheck.matches(e);
+ }
+ }
+ for (Check exCheck : extraCheck) {
+ exCheck.matches(e);
+ }
+ throw e;
+ }
+ }
+
+ @Override
+ public HttpClient getClient() {
+ return delegate.getClient();
+ }
+
+ @Override
+ public ClientConfiguration getClientConfiguration() {
+ return delegate.getClientConfiguration();
+ }
+
+ @Override
+ public Endpoint getEndpoint(String resource) {
+ return delegate.getEndpoint(resource);
+ }
+
+ @Override
+ public boolean hasEndpoint(String urlPattern) {
+ return delegate.hasEndpoint(urlPattern);
+ }
+
+ @Override
+ public void shutdown() {
+ delegate.shutdown();
+ }
+
+ /**
+ * Return a collection of all available data providers (i.e. registered through the service loader).
+ *
+ * @return
+ */
+ @Override
+ public Set<DataProvider> getDataProviders() {
+ return delegate.getDataProviders();
+ }
+
+ public void addCheck(Check check) {
+ extraCheck.add(check);
+ }
+
+ public void addChecks(Collection<Check> checks) {
+ extraCheck.addAll(checks);
+ }
+
+ public boolean isDefaultChecks() {
+ return defaultChecks;
+ }
+
+ public void setDefaultChecks(boolean defaultChecks) {
+ this.defaultChecks = defaultChecks;
+ }
+
+ public static class Check {
+ private final Class<? extends Throwable> throwable;
+ private final Pattern messagePattern;
+ private boolean checkStack = true;
+ private String infoMessage;
+
+ public Check(Class<? extends Throwable> throwable) {
+ this(throwable, (Pattern) null);
+ }
+
+ public Check(Class<? extends Throwable> throwable, String message) {
+ this(throwable, Pattern.compile(Pattern.quote(message), Pattern.CASE_INSENSITIVE));
+ this.infoMessage = String.format("Ignoring because of %s(\"%s\")", throwable.getSimpleName(), message);
+ }
+
+ public Check(Class<? extends Throwable> throwable, Pattern mPattern) {
+ this.throwable = throwable;
+ this.messagePattern = mPattern;
+ this.infoMessage = String.format("Ignoring because of %s", throwable.getSimpleName());
+ }
+
+ public Check setCheckStack(boolean checkStack) {
+ this.checkStack = checkStack;
+ return this;
+ }
+
+ /**
+ * Check if the provided parameter matches this check.
+ * If so, the current JUnit test is ignored ({@link Assume}) and the parameter exception is re-thrown.
+ * @param t the {@link Throwable} to check.
+ */
+ public <T extends Throwable> void matches(T t) throws T {
+ matches(t, t);
+ }
+
+ private <T extends Throwable> void matches(T t, Throwable toCheck) throws T {
+ if (toCheck == null) return;
+
+ if (throwable.isAssignableFrom(toCheck.getClass())
+ && (messagePattern == null
+ || (toCheck.getMessage() != null && messagePattern.matcher(toCheck.getMessage()).find()))) {
+ log.info("Ignoring test because '{}' ({})", getMessage(), t.getMessage());
+ Assume.assumeNoException(getMessage(), t);
+ throw t;
+ }
+
+ if (checkStack)
+ matches(t, toCheck.getCause());
+ }
+
+ public String getMessage() {
+ return infoMessage;
+ }
+
+ public Check setMessage(String infoMessage) {
+ this.infoMessage = infoMessage;
+ return this;
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.endpoint.Endpoint
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.endpoint.Endpoint b/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.endpoint.Endpoint
new file mode 100644
index 0000000..cda05f9
--- /dev/null
+++ b/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.endpoint.Endpoint
@@ -0,0 +1 @@
+org.apache.marmotta.ldclient.dummy.DummyEndpoint
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider b/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider
new file mode 100644
index 0000000..2b6fcfc
--- /dev/null
+++ b/ldclient/ldclient-core/src/test/resources/META-INF/services/org.apache.marmotta.ldclient.api.provider.DataProvider
@@ -0,0 +1 @@
+org.apache.marmotta.ldclient.dummy.DummyProvider
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.classpath
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.classpath b/ldclient/ldclient-provider-html/.classpath
new file mode 100644
index 0000000..1b28ee5
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.classpath
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" output="target/classes" path="src/main/java"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
+ <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+ <attributes>
+ <attribute name="org.eclipse.jst.component.nondependency" value=""/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="output" path="target/classes"/>
+</classpath>
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.project
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.project b/ldclient/ldclient-provider-html/.project
new file mode 100644
index 0000000..b7daee3
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.project
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>ldclient-provider-html</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.wst.common.project.facet.core.builder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.m2e.core.maven2Builder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.wst.validation.validationbuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jem.workbench.JavaEMFNature</nature>
+ <nature>org.eclipse.wst.common.modulecore.ModuleCoreNature</nature>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ <nature>org.eclipse.m2e.core.maven2Nature</nature>
+ <nature>org.eclipse.wst.common.project.facet.core.nature</nature>
+ </natures>
+</projectDescription>
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.settings/org.eclipse.core.resources.prefs
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.settings/org.eclipse.core.resources.prefs b/ldclient/ldclient-provider-html/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 0000000..2b76340
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,2 @@
+eclipse.preferences.version=1
+encoding//src/main/java=UTF-8
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.settings/org.eclipse.jdt.core.prefs
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.settings/org.eclipse.jdt.core.prefs b/ldclient/ldclient-provider-html/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..69c31cd
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,8 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+org.eclipse.jdt.core.compiler.compliance=1.6
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.6
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.settings/org.eclipse.m2e.core.prefs
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.settings/org.eclipse.m2e.core.prefs b/ldclient/ldclient-provider-html/.settings/org.eclipse.m2e.core.prefs
new file mode 100644
index 0000000..f897a7f
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.component
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.component b/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.component
new file mode 100644
index 0000000..7e180a5
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.component
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project-modules id="moduleCoreId" project-version="1.5.0">
+ <wb-module deploy-name="ldclient-provider-html">
+ <wb-resource deploy-path="/" source-path="/src/main/java"/>
+ </wb-module>
+</project-modules>
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.project.facet.core.xml
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.project.facet.core.xml b/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.project.facet.core.xml
new file mode 100644
index 0000000..c78d932
--- /dev/null
+++ b/ldclient/ldclient-provider-html/.settings/org.eclipse.wst.common.project.facet.core.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<faceted-project>
+ <installed facet="java" version="1.6"/>
+ <installed facet="jst.utility" version="1.0"/>
+</faceted-project>
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/pom.xml
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/pom.xml b/ldclient/ldclient-provider-html/pom.xml
new file mode 100644
index 0000000..f6415c8
--- /dev/null
+++ b/ldclient/ldclient-provider-html/pom.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Copyright (c) 2013 Salzburg Research.
+ ~
+ ~ Licensed under the Apache License, Version 2.0 (the "License");
+ ~ you may not use this file except in compliance with the License.
+ ~ You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>at.newmedialab.lmf</groupId>
+ <artifactId>ldclient-parent</artifactId>
+ <version>3.0.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>ldclient-provider-html</artifactId>
+ <name>LDClient Provider: HTML Resource Access</name>
+
+ <description>
+ Provides basic support for accessing HTML resources as Linked Data. This package only offers abstract classes
+ that need to be subclassed for concrete cases.
+ </description>
+
+ <dependencies>
+ <dependency>
+ <groupId>at.newmedialab.lmf</groupId>
+ <artifactId>ldclient-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>at.newmedialab.lmf</groupId>
+ <artifactId>ldclient-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.jsoup</groupId>
+ <artifactId>jsoup</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>at.newmedialab.sesame</groupId>
+ <artifactId>sesame-commons</artifactId>
+ </dependency>
+
+ </dependencies>
+
+</project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/AbstractHTMLDataProvider.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/AbstractHTMLDataProvider.java b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/AbstractHTMLDataProvider.java
new file mode 100644
index 0000000..0d63dc2
--- /dev/null
+++ b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/AbstractHTMLDataProvider.java
@@ -0,0 +1,140 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.provider.html;
+
+import at.newmedialab.sesame.commons.model.Namespaces;
+import org.apache.marmotta.ldclient.api.provider.DataProvider;
+import org.apache.marmotta.ldclient.exception.DataRetrievalException;
+import org.apache.marmotta.ldclient.provider.html.mapping.JSoupMapper;
+import org.apache.marmotta.ldclient.services.provider.AbstractHttpProvider;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.openrdf.model.Resource;
+import org.openrdf.model.Statement;
+import org.openrdf.model.URI;
+import org.openrdf.model.Value;
+import org.openrdf.model.ValueFactory;
+import org.openrdf.repository.Repository;
+import org.openrdf.repository.RepositoryConnection;
+import org.openrdf.repository.RepositoryException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Generic implementation of an HTML data provider capable of mapping XPath expressions from HTML documents to
+ * RDF metadata properties. The HTML data provider will first clean up the potentially messy HTML using HTMLCleaner
+ * and then evaluate XPath expressions on it in the same way as the AbstractXMLDataProvider
+ * <p/>
+ * Author: Sebastian Schaffert
+ */
+public abstract class AbstractHTMLDataProvider extends AbstractHttpProvider implements DataProvider {
+
+ /**
+ * Return a list of URIs that should be added as types for each processed resource.
+ *
+ * @return
+ * @param resource
+ */
+ protected abstract List<String> getTypes(URI resource);
+
+ /**
+ * Try to find further URLs in the document that need to be requested to complete the resource
+ * data.
+ * Used e.g. to parse the result of paging in HTML pages. The default implementation returns an
+ * empty list.
+ *
+ *
+ * @param resource
+ * @param document
+ * @param requestUrl
+ * @return
+ */
+ protected List<String> findAdditionalRequestUrls(String resource, Document document, String requestUrl) {
+ return Collections.emptyList();
+ }
+
+
+ /**
+ * Parse the HTTP response entity returned by the web service call and return its contents as a Sesame RDF
+ * repository. The content type returned by the web service is passed as argument to help the implementation
+ * decide how to parse the data.
+ *
+ *
+ * @param resource the subject of the data retrieval
+ * @param in input stream as returned by the remote webservice
+ * @param contentType content type as returned in the HTTP headers of the remote webservice
+ * @return an RDF repository containing an RDF representation of the dataset located at the remote resource.
+ * @throws java.io.IOException in case an error occurs while reading the input stream
+ */
+ @Override
+ public List<String> parseResponse(String resource, String requestUrl, Repository triples, InputStream in, String contentType) throws DataRetrievalException {
+ String charset = null;
+ Pattern pattern = Pattern.compile("charset=([a-zA-Z0-9-_]+)");
+ Matcher matcher = pattern.matcher(contentType);
+ if(matcher.find()) {
+ charset = matcher.group(1);
+ }
+
+ try {
+ Document htmlDoc = Jsoup.parse(in,charset,requestUrl);
+
+ RepositoryConnection con = triples.getConnection();
+ ValueFactory vf = con.getValueFactory();
+ URI subject = vf.createURI(resource);
+
+ for (Map.Entry<String, JSoupMapper> mapping : getMappings(resource, requestUrl).entrySet()) {
+ URI predicate = vf.createURI(mapping.getKey());
+
+ final Elements values = mapping.getValue().select(htmlDoc);
+ for(Element value : values) {
+ List<Value> objects = mapping.getValue().map(resource, value, vf);
+ for(Value object : objects) {
+ Statement stmt = vf.createStatement(subject, predicate, object);
+ con.add(stmt);
+ }
+ }
+ }
+
+ org.openrdf.model.URI ptype = vf.createURI(Namespaces.NS_RDF + "type");
+
+ for(String typeUri : getTypes(subject)) {
+ Resource type_resource = vf.createURI(typeUri);
+ con.add(vf.createStatement(subject, ptype, type_resource));
+ }
+
+ con.commit();
+ con.close();
+
+ return findAdditionalRequestUrls(resource, htmlDoc, requestUrl);
+
+ } catch (RepositoryException e) {
+ throw new DataRetrievalException("repository error while parsing XML response",e);
+ } catch (IOException e) {
+ throw new DataRetrievalException("I/O error while parsing HTML response",e);
+ }
+
+ }
+
+ protected abstract Map<String, JSoupMapper> getMappings(String resource, String requestUrl);
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssDateLiteralMapper.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssDateLiteralMapper.java b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssDateLiteralMapper.java
new file mode 100644
index 0000000..6d8b129
--- /dev/null
+++ b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssDateLiteralMapper.java
@@ -0,0 +1,96 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.provider.html.mapping;
+
+import at.newmedialab.sesame.commons.model.Namespaces;
+import at.newmedialab.sesame.commons.util.DateUtils;
+import org.jsoup.nodes.Element;
+import org.openrdf.model.Value;
+import org.openrdf.model.ValueFactory;
+
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+
+public class CssDateLiteralMapper extends CssTextLiteralMapper {
+
+ protected DateFormat format = null;
+
+ public CssDateLiteralMapper(String cssSelector) {
+ this(cssSelector, "dateTime");
+ }
+
+ public CssDateLiteralMapper(String cssSelector, String datatype) {
+ super(cssSelector, datatype);
+ }
+
+ public CssDateLiteralMapper(Selector selector) {
+ this(selector, "dateTime");
+ }
+
+ public CssDateLiteralMapper(Selector selector, String datatype) {
+ super(selector, datatype);
+ }
+
+ public CssDateLiteralMapper setParseFormat(String format) {
+ this.format = new SimpleDateFormat(format);
+ return this;
+ }
+
+ public CssDateLiteralMapper setParseFormat(DateFormat format) {
+ this.format = format;
+ return this;
+ }
+
+ @Override
+ public List<Value> map(String resourceUri, Element elem, ValueFactory factory) {
+ final String value = elem.text().trim();
+ Date date = parseDate(value);
+
+ final String dateString;
+ if (date != null) {
+ if ("dateTime".equals(datatype)) {
+ dateString = DateUtils.ISO8601FORMAT.format(date);
+ } else if ("date".equals(datatype)) {
+ dateString = DateUtils.ISO8601FORMAT_DATE.format(date);
+ } else if ("time".equals(datatype)) {
+ dateString = DateUtils.ISO8601FORMAT_TIME.format(date);
+ } else {
+ dateString = value;
+ }
+ return Collections.singletonList((Value) factory.createLiteral(dateString, factory.createURI(Namespaces.NS_XSD + datatype)));
+ } else
+ return Collections.emptyList();
+ }
+
+ protected Date parseDate(final String value) {
+ Date date;
+ if (format != null) {
+ try {
+ date = format.parse(value);
+ } catch (ParseException e) {
+ date = null;
+ }
+ } else {
+ date = DateUtils.parseDate(value);
+ }
+ return date;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssLiteralAttrMapper.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssLiteralAttrMapper.java b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssLiteralAttrMapper.java
new file mode 100644
index 0000000..06f9987
--- /dev/null
+++ b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssLiteralAttrMapper.java
@@ -0,0 +1,89 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.provider.html.mapping;
+
+import at.newmedialab.sesame.commons.model.Namespaces;
+import org.apache.commons.lang.StringUtils;
+import org.jsoup.nodes.Element;
+import org.openrdf.model.Value;
+import org.openrdf.model.ValueFactory;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+public class CssLiteralAttrMapper extends CssSelectorMapper {
+
+ protected final String attr;
+ protected final String datatype;
+ protected final Locale language;
+
+ private CssLiteralAttrMapper(String cssSelector, String attr, Locale lang, String datatype) {
+ super(cssSelector);
+ this.attr = attr;
+ this.language = lang;
+ this.datatype = datatype;
+ }
+
+ private CssLiteralAttrMapper(Selector selector, String attr, Locale lang, String datatype) {
+ super(selector);
+ this.attr = attr;
+ this.language = lang;
+ this.datatype = datatype;
+ }
+
+ public CssLiteralAttrMapper(String cssSelector, String attr, Locale lang) {
+ this(cssSelector, attr, lang, null);
+ }
+
+ public CssLiteralAttrMapper(String cssSelector, String attr, String datatype) {
+ this(cssSelector, attr, null, datatype);
+ }
+
+ public CssLiteralAttrMapper(String cssSelector, String attr) {
+ this(cssSelector, attr, null, null);
+ }
+
+ public CssLiteralAttrMapper(Selector selector, String attr, Locale lang) {
+ this(selector, attr, lang, null);
+ }
+
+ public CssLiteralAttrMapper(Selector selector, String attr, String datatype) {
+ this(selector, attr, null, datatype);
+ }
+
+ public CssLiteralAttrMapper(Selector selector, String attr) {
+ this(selector, attr, null, null);
+ }
+
+ protected String cleanValue(String value) {
+ return value.trim();
+ }
+
+ @Override
+ public List<Value> map(String resourceUri, Element elem, ValueFactory factory) {
+ final String value = cleanValue(elem.attr(attr));
+ if (StringUtils.isBlank(value)) return Collections.emptyList();
+ if (language != null)
+ return Collections.singletonList((Value) factory.createLiteral(value, language.toString()));
+ if (datatype != null)
+ return Collections.singletonList((Value) factory.createLiteral(value, factory.createURI(Namespaces.NS_XSD + datatype)));
+ else
+ return Collections.singletonList((Value) factory.createLiteral(value));
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssSelectorMapper.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssSelectorMapper.java b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssSelectorMapper.java
new file mode 100644
index 0000000..1c1ce5e
--- /dev/null
+++ b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssSelectorMapper.java
@@ -0,0 +1,48 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.provider.html.mapping;
+
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+
+public abstract class CssSelectorMapper implements JSoupMapper {
+
+ protected final Selector selector;
+
+ public CssSelectorMapper(final String cssSelector) {
+ this(new Selector() {
+ @Override
+ public Elements select(Element node) {
+ return node.select(cssSelector);
+ }
+ });
+ }
+
+ public CssSelectorMapper(Selector selector) {
+ this.selector = selector;
+ }
+
+ @Override
+ public Elements select(Element htmlDoc) {
+ return selector.select(htmlDoc);
+ }
+
+ public interface Selector {
+ public Elements select(Element node);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssTextLiteralMapper.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssTextLiteralMapper.java b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssTextLiteralMapper.java
new file mode 100644
index 0000000..fa52074
--- /dev/null
+++ b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssTextLiteralMapper.java
@@ -0,0 +1,86 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.provider.html.mapping;
+
+import at.newmedialab.sesame.commons.model.Namespaces;
+import org.apache.commons.lang.StringUtils;
+import org.jsoup.nodes.Element;
+import org.openrdf.model.Value;
+import org.openrdf.model.ValueFactory;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+public class CssTextLiteralMapper extends CssSelectorMapper {
+
+ protected final String datatype;
+ protected final Locale language;
+
+ private CssTextLiteralMapper(String cssSelector, Locale lang, String datatype) {
+ super(cssSelector);
+ language = lang;
+ this.datatype = datatype;
+ }
+
+ private CssTextLiteralMapper(Selector selector, Locale lang, String datatype) {
+ super(selector);
+ language = lang;
+ this.datatype = datatype;
+ }
+
+ public CssTextLiteralMapper(String cssSelector) {
+ this(cssSelector, null, null);
+ }
+
+ public CssTextLiteralMapper(String cssSelector, Locale lang) {
+ this(cssSelector, lang, null);
+ }
+
+ public CssTextLiteralMapper(String cssSelector, String datatype) {
+ this(cssSelector, null, datatype);
+ }
+
+ public CssTextLiteralMapper(Selector selector) {
+ this(selector, null, null);
+ }
+
+ public CssTextLiteralMapper(Selector selector, Locale lang) {
+ this(selector, lang, null);
+ }
+
+ public CssTextLiteralMapper(Selector selector, String datatype) {
+ this(selector, null, datatype);
+ }
+
+ protected String cleanValue(String value) {
+ return value.trim();
+ }
+
+ @Override
+ public List<Value> map(String resourceUri, Element elem, ValueFactory factory) {
+ final String value = cleanValue(elem.text());
+ if (StringUtils.isBlank(value)) return Collections.emptyList();
+ if (language != null)
+ return Collections.singletonList((Value) factory.createLiteral(value, language.toString()));
+ if (datatype != null)
+ return Collections.singletonList((Value) factory.createLiteral(value, factory.createURI(Namespaces.NS_XSD + datatype)));
+ else
+ return Collections.singletonList((Value) factory.createLiteral(value));
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/c32963d5/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssUriAttrBlacklistQueryParamsMapper.java
----------------------------------------------------------------------
diff --git a/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssUriAttrBlacklistQueryParamsMapper.java b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssUriAttrBlacklistQueryParamsMapper.java
new file mode 100644
index 0000000..a79ae4a
--- /dev/null
+++ b/ldclient/ldclient-provider-html/src/main/java/org/apache/marmotta/ldclient/provider/html/mapping/CssUriAttrBlacklistQueryParamsMapper.java
@@ -0,0 +1,58 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.ldclient.provider.html.mapping;
+
+import org.apache.http.NameValuePair;
+import org.apache.http.client.utils.URIBuilder;
+import org.apache.http.client.utils.URLEncodedUtils;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Arrays;
+import java.util.List;
+
+public class CssUriAttrBlacklistQueryParamsMapper extends CssUriAttrMapper {
+
+ protected List<String> queryParams;
+
+ public CssUriAttrBlacklistQueryParamsMapper(String cssSelector, String attr, String... queryParamWhitelist) {
+ super(cssSelector, attr);
+ queryParams = Arrays.asList(queryParamWhitelist);
+ }
+
+ public CssUriAttrBlacklistQueryParamsMapper(Selector selector, String attr, String... queryParamWhitelist) {
+ super(selector, attr);
+ queryParams = Arrays.asList(queryParamWhitelist);
+ }
+
+ @Override
+ protected String rewriteUrl(String url) {
+ try {
+ URI u = new URI(url);
+ URIBuilder builder = new URIBuilder(u).removeQuery();
+ for (NameValuePair p : URLEncodedUtils.parse(u, "UTF-8")) {
+ if (!queryParams.contains(p.getName())) {
+ builder.setParameter(p.getName(), p.getValue());
+ }
+ }
+ final String string = builder.build().toString();
+ return super.rewriteUrl(string);
+ } catch (URISyntaxException e) {
+ return super.rewriteUrl(url);
+ }
+ }
+
+}