You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by wi...@apache.org on 2014/01/20 10:19:47 UTC

git commit: MARMOTTA-423: doing a manual pre-parsing, looking only for literals as objects

Updated Branches:
  refs/heads/develop 6b4fc9dd4 -> 387e2f330


MARMOTTA-423: doing a manual pre-parsing, looking only for literals as objects


Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/387e2f33
Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/387e2f33
Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/387e2f33

Branch: refs/heads/develop
Commit: 387e2f330a08e1e2917d989b1718265341f056ef
Parents: 6b4fc9d
Author: Sergio Fernández <wi...@apache.org>
Authored: Mon Jan 20 10:18:52 2014 +0100
Committer: Sergio Fernández <wi...@apache.org>
Committed: Mon Jan 20 10:18:52 2014 +0100

----------------------------------------------------------------------
 .../provider/freebase/FreebaseProvider.java     | 34 ++++++++++++++++----
 1 file changed, 28 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/marmotta/blob/387e2f33/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java
----------------------------------------------------------------------
diff --git a/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java b/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java
index 8b7faff..ee82d39 100644
--- a/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java
+++ b/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java
@@ -33,6 +33,8 @@ import org.openrdf.rio.RDFParseException;
 import org.openrdf.rio.Rio;
 
 import com.github.vigsterkr.freebase.fix.FreebaseFixit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.*;
 import java.util.Collections;
@@ -47,12 +49,15 @@ import java.util.regex.Pattern;
  */
 public class FreebaseProvider extends AbstractHttpProvider {
 
+    private static Logger log = LoggerFactory.getLogger(FreebaseProvider.class);
+
     public static final String NAME = "Freebase";
     public static final String PATTERN = "http(s?)://rdf\\.freebase\\.com/ns/.*";
     public static final String API = "https://www.googleapis.com/freebase/v1/rdf/";
     public static final RDFFormat DEFAULT_RDF_FORMAT = RDFFormat.TURTLE;
     public static final String DEFAULT_ENCODING = "UTF-8";
     private static final Pattern CHARSET_PATTERN = Pattern.compile("(?i)\\bcharset=\\s*\"?([^\\s;\"]*)");
+    private static final  Pattern FREEBASE_LITERAL_PATTERN = Pattern.compile("^\\s+[a-z]+:[a-z]+(?:\\.[a-z]+)*\\s+\"(.*)\"(?:@[a-z]+)?;$");
 
     /**
      * Return the name of this data provider. To be used e.g. in the configuration and in log messages.
@@ -130,12 +135,29 @@ public class FreebaseProvider extends AbstractHttpProvider {
      */
     private InputStream fix(InputStream is, String encoding) throws IOException {
         BufferedReader br = new BufferedReader(new InputStreamReader(is));
-        PipedOutputStream po = new PipedOutputStream();
-        PrintStream ps = new PrintStream(po);
-        FreebaseFixit.fix(br, ps);
-        ps.flush();
-        ps.close();
-        return new PipedInputStream(po);
+        StringBuffer sb = new StringBuffer();
+        String line;
+        while ((line = br.readLine()) != null) {
+            Matcher m = FREEBASE_LITERAL_PATTERN.matcher(line);
+            if (m.matches()) {
+                //literal found
+                try {
+                    String literal = m.group(2);
+                    log.info("Original literal: {}", literal);
+                    String fixed = FreebaseFixit.fixObject(literal);
+                    log.info("Fixed literal: {}", literal);
+                    sb.append("    " + m.group(1) + "    \"" + fixed + "\"" + m.group(3) + " ; \n");
+                } catch (Exception e) {
+                    log.error("Error fixing line, so triple ignored: {}", e.getMessage());
+                    log.debug("error on line: {}", line);
+                }
+            } else {
+                //not a triple with a literal as object, so pass-through
+                sb.append(line);
+            }
+            sb.append(("\n"));
+        }
+        return new ByteArrayInputStream(sb.toString().getBytes());
     }
 
 }