You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by wi...@apache.org on 2014/01/17 15:17:57 UTC
git commit: MARMOTTA-423: preparing the infrastructure to plugin
rules to fix the wrong turtle
Updated Branches:
refs/heads/develop dd23600a0 -> 9e737af91
MARMOTTA-423: preparing the infrastructure to plugin rules to fix the wrong turtle
Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/9e737af9
Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/9e737af9
Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/9e737af9
Branch: refs/heads/develop
Commit: 9e737af916fcaae92d406f127607b3628a768407
Parents: dd23600
Author: Sergio Fernández <wi...@apache.org>
Authored: Fri Jan 17 15:17:44 2014 +0100
Committer: Sergio Fernández <wi...@apache.org>
Committed: Fri Jan 17 15:17:44 2014 +0100
----------------------------------------------------------------------
.../ldclient/ldclient-provider-freebase/pom.xml | 3 +-
.../endpoint/freebase/FreebaseEndpoint.java | 5 +-
.../provider/freebase/FreebaseProvider.java | 53 +++++++++++++++++---
3 files changed, 49 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/marmotta/blob/9e737af9/libraries/ldclient/ldclient-provider-freebase/pom.xml
----------------------------------------------------------------------
diff --git a/libraries/ldclient/ldclient-provider-freebase/pom.xml b/libraries/ldclient/ldclient-provider-freebase/pom.xml
index 4b55598..4ca4dd1 100644
--- a/libraries/ldclient/ldclient-provider-freebase/pom.xml
+++ b/libraries/ldclient/ldclient-provider-freebase/pom.xml
@@ -57,9 +57,8 @@
<scope>test</scope>
</dependency>
<dependency>
- <groupId>commons-io</groupId>
+ <groupId>org.apache.commons</groupId>
<artifactId>commons-io</artifactId>
- <scope>test</scope>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
http://git-wip-us.apache.org/repos/asf/marmotta/blob/9e737af9/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/endpoint/freebase/FreebaseEndpoint.java
----------------------------------------------------------------------
diff --git a/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/endpoint/freebase/FreebaseEndpoint.java b/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/endpoint/freebase/FreebaseEndpoint.java
index 1964365..688ea5b 100644
--- a/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/endpoint/freebase/FreebaseEndpoint.java
+++ b/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/endpoint/freebase/FreebaseEndpoint.java
@@ -19,16 +19,17 @@ package org.apache.marmotta.ldclient.endpoint.freebase;
import org.apache.marmotta.commons.http.ContentType;
import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
+import org.apache.marmotta.ldclient.provider.freebase.FreebaseProvider;
/**
- * A particular endpoint for accessing RDF from Freebase.
+ * Endpoint for accessing RDF from Freebase.
*
* @author Sergio Fernández
*/
public class FreebaseEndpoint extends Endpoint {
public FreebaseEndpoint() {
- super("Freebase", "Freebase", "http(s?)://rdf\\.freebase\\.com/ns/.*", null, 86400L);
+ super(FreebaseProvider.NAME, FreebaseProvider.NAME, FreebaseProvider.PATTERN, null, 86400L);
setPriority(PRIORITY_MEDIUM);
addContentType(new ContentType("text", "turtle", 1.0));
addContentType(new ContentType("text", "plain", 0.2));
http://git-wip-us.apache.org/repos/asf/marmotta/blob/9e737af9/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java
----------------------------------------------------------------------
diff --git a/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java b/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java
index 38a03d8..699c294 100644
--- a/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java
+++ b/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java
@@ -20,8 +20,8 @@ package org.apache.marmotta.ldclient.provider.freebase;
import com.google.common.base.Preconditions;
import javolution.util.function.Predicate;
+import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
-import org.apache.marmotta.commons.http.ContentType;
import org.apache.marmotta.commons.sesame.model.ModelCommons;
import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
import org.apache.marmotta.ldclient.exception.DataRetrievalException;
@@ -30,12 +30,16 @@ import org.openrdf.model.Model;
import org.openrdf.model.Statement;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFParseException;
-import org.openrdf.rio.RDFParserRegistry;
+import org.openrdf.rio.Rio;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.StringWriter;
import java.util.Collections;
import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/**
* Linked Data patched data provider to Freebase.
@@ -44,8 +48,12 @@ import java.util.List;
*/
public class FreebaseProvider extends AbstractHttpProvider {
- public static final String PROVIDER_NAME = "Freebase";
+ public static final String NAME = "Freebase";
+ public static final String PATTERN = "http(s?)://rdf\\.freebase\\.com/ns/.*";
public static final String API = "https://www.googleapis.com/freebase/v1/rdf/";
+ public static final RDFFormat DEFAULT_RDF_FORMAT = RDFFormat.TURTLE;
+ public static final String DEFAULT_ENCODING = "UTF-8";
+ private static final Pattern CHARSET_PATTERN = Pattern.compile("(?i)\\bcharset=\\s*\"?([^\\s;\"]*)");
/**
* Return the name of this data provider. To be used e.g. in the configuration and in log messages.
@@ -54,7 +62,7 @@ public class FreebaseProvider extends AbstractHttpProvider {
*/
@Override
public String getName() {
- return PROVIDER_NAME;
+ return NAME;
}
@Override
@@ -73,7 +81,7 @@ public class FreebaseProvider extends AbstractHttpProvider {
*/
@Override
public List<String> buildRequestUrl(String uri, Endpoint endpoint) {
- Preconditions.checkNotNull(uri);
+ Preconditions.checkState(StringUtils.isNotBlank(uri));
String id = uri.substring(uri.lastIndexOf('/') + 1);
String url = API + id.replace('.', '/');
return Collections.singletonList(url);
@@ -81,10 +89,24 @@ public class FreebaseProvider extends AbstractHttpProvider {
@Override
public List<String> parseResponse(final String resourceUri, final String requestUrl, Model triples, InputStream in, final String contentType) throws DataRetrievalException {
- Preconditions.checkState(contentType.contains("text/plain"), "Unexpected content type: " + contentType);
- RDFFormat format = RDFFormat.TURTLE;
+
+ RDFFormat format;
+ if (StringUtils.isNotBlank(contentType) && (contentType.contains("text/plain")||contentType.contains("text/turtle"))) {
+ format = DEFAULT_RDF_FORMAT;
+ } else {
+ format = Rio.getWriterFormatForMIMEType(contentType, DEFAULT_RDF_FORMAT);
+ }
+
+ String encoding = DEFAULT_ENCODING;
+ Matcher m = CHARSET_PATTERN.matcher(contentType);
+ if (StringUtils.isNotBlank(contentType) && m.find()) {
+ encoding = m.group(1).trim().toUpperCase();
+ } else {
+ encoding = DEFAULT_ENCODING;
+ }
+
try {
- ModelCommons.add(triples, in, resourceUri, format, new Predicate<Statement>() {
+ ModelCommons.add(triples, fix(in, encoding), resourceUri, format, new Predicate<Statement>() {
@Override
public boolean test(Statement param) {
return StringUtils.equals(param.getSubject().stringValue(), resourceUri);
@@ -96,6 +118,21 @@ public class FreebaseProvider extends AbstractHttpProvider {
} catch (IOException e) {
throw new DataRetrievalException("I/O error while trying to read remote Turtle from Freebase", e);
}
+
+ }
+
+ /**
+ * Fixes Freebase deficiencies on Turtle serialization
+ *
+ * @param in stream with the raw data
+ * @return fixed stream
+ */
+ private InputStream fix(InputStream in, String encoding) throws IOException {
+ StringWriter writer = new StringWriter();
+ IOUtils.copy(in, writer, encoding);
+ String raw = writer.toString();
+ //TODO: perform fixes
+ return new ByteArrayInputStream(raw.getBytes(encoding));
}
}