You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by wi...@apache.org on 2014/06/13 10:58:46 UTC
[086/100] [abbrv] git commit: moved to commons the quick (and dirty)
trick to encode latin1 string from freebase
moved to commons the quick (and dirty) trick to encode latin1 string from freebase
Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/b00e935d
Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/b00e935d
Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/b00e935d
Branch: refs/heads/ldp
Commit: b00e935d0fce5944367ed6f41f8ae108e008edf3
Parents: c5f0d76
Author: Sergio Fernández <wi...@apache.org>
Authored: Thu Apr 24 20:16:07 2014 +0200
Committer: Sergio Fernández <wi...@apache.org>
Committed: Thu Apr 24 20:16:07 2014 +0200
----------------------------------------------------------------------
.../marmotta/commons/util/StringUtils.java | 40 ++++++++++++++++++++
.../ldclient/ldclient-provider-freebase/pom.xml | 4 ++
.../provider/freebase/FreebaseProvider.java | 20 ++--------
3 files changed, 47 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/marmotta/blob/b00e935d/commons/marmotta-commons/src/main/java/org/apache/marmotta/commons/util/StringUtils.java
----------------------------------------------------------------------
diff --git a/commons/marmotta-commons/src/main/java/org/apache/marmotta/commons/util/StringUtils.java b/commons/marmotta-commons/src/main/java/org/apache/marmotta/commons/util/StringUtils.java
new file mode 100644
index 0000000..3e597c4
--- /dev/null
+++ b/commons/marmotta-commons/src/main/java/org/apache/marmotta/commons/util/StringUtils.java
@@ -0,0 +1,40 @@
+package org.apache.marmotta.commons.util;
+
+/**
+ * Some util string functions
+ *
+ * @author Sergio Fernández
+ */
+public class StringUtils {
+
+ private StringUtils() {
+
+ }
+
+ public static String fixLatin1(String str) {
+ //TODO: find a way to re-code properly the literal
+ //http://www.ic.unicamp.br/~stolfi/EXPORT/www/ISO-8859-1-Encoding.html
+ str = str.replaceAll("\\\\xe1", "á");
+ str = str.replaceAll("\\\\xe2", "â");
+ str = str.replaceAll("\\\\xe3", "ã");
+ str = str.replaceAll("\\\\xe4", "ä");
+ str = str.replaceAll("\\\\xe7", "ç");
+ str = str.replaceAll("\\\\xe8", "è");
+ str = str.replaceAll("\\\\xe9", "é");
+ str = str.replaceAll("\\\\xea", "ê");
+ str = str.replaceAll("\\\\xeb", "ë");
+ str = str.replaceAll("\\\\xed", "í");
+ str = str.replaceAll("\\\\xee", "î");
+ str = str.replaceAll("\\\\xef", "ï");
+ str = str.replaceAll("\\\\xf3", "ó");
+ str = str.replaceAll("\\\\xf4", "ô");
+ str = str.replaceAll("\\\\xf6", "ö");
+ str = str.replaceAll("\\\\xf9", "ù");
+ str = str.replaceAll("\\\\xfb", "û");
+ str = str.replaceAll("\\\\xfc", "ü");
+ str = str.replaceAll("\\\\xfa", "ú");
+ str = str.replaceAll("\\\\x", ""); //FIXME: wrong, wrong, wrong!
+ return str;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/marmotta/blob/b00e935d/libraries/ldclient/ldclient-provider-freebase/pom.xml
----------------------------------------------------------------------
diff --git a/libraries/ldclient/ldclient-provider-freebase/pom.xml b/libraries/ldclient/ldclient-provider-freebase/pom.xml
index 39f9848..5cba92c 100644
--- a/libraries/ldclient/ldclient-provider-freebase/pom.xml
+++ b/libraries/ldclient/ldclient-provider-freebase/pom.xml
@@ -42,6 +42,10 @@
<groupId>org.openrdf.sesame</groupId>
<artifactId>sesame-rio-turtle</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.marmotta</groupId>
+ <artifactId>marmotta-commons</artifactId>
+ </dependency>
<dependency>
<groupId>junit</groupId>
http://git-wip-us.apache.org/repos/asf/marmotta/blob/b00e935d/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java
----------------------------------------------------------------------
diff --git a/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java b/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java
index ef3652b..fe39296 100644
--- a/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java
+++ b/libraries/ldclient/ldclient-provider-freebase/src/main/java/org/apache/marmotta/ldclient/provider/freebase/FreebaseProvider.java
@@ -183,7 +183,7 @@ public class FreebaseProvider extends AbstractHttpProvider {
} else if (o.contains("\\u")) {
o = StringEscapeUtils.unescapeJava(o);
} else if (o.contains("\\x")) {
- o = fixLatin1(o);
+ o = org.apache.marmotta.commons.util.StringUtils.fixLatin1(o);
}
sb.append(" " + p + " " + o + tripleMatcher.group(3));
sb.append("\n");
@@ -213,7 +213,7 @@ public class FreebaseProvider extends AbstractHttpProvider {
//wrong charset
if (literal.contains("\\x")) {
- literal = fixLatin1(literal);
+ literal = org.apache.marmotta.commons.util.StringUtils.fixLatin1(literal);
}
//wrong unicode encoding
@@ -224,18 +224,4 @@ public class FreebaseProvider extends AbstractHttpProvider {
return literal;
}
- private String fixLatin1(String str) {
- //TODO: find a way to re-code properly the literal
- //http://www.ic.unicamp.br/~stolfi/EXPORT/www/ISO-8859-1-Encoding.html
- str = str.replaceAll("\\\\xe1", "á");
- str = str.replaceAll("\\\\xe3", "ã");
- str = str.replaceAll("\\\\xe7", "ç");
- str = str.replaceAll("\\\\xe9", "é");
- str = str.replaceAll("\\\\xed", "í");
- str = str.replaceAll("\\\\xf3", "ó");
- str = str.replaceAll("\\\\xfa", "ú");
- str = str.replaceAll("\\\\x", ""); //FIXME: wrong, wrong, wrong!
- return str;
- }
-
-}
\ No newline at end of file
+}