You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by ss...@apache.org on 2013/11/05 17:15:56 UTC
git commit: bug fixes in KiWi SPARQL: better regexp support
Updated Branches:
refs/heads/develop 8bf44fd17 -> fa78fa55f
bug fixes in KiWi SPARQL: better regexp support
Project: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/commit/fa78fa55
Tree: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/tree/fa78fa55
Diff: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/diff/fa78fa55
Branch: refs/heads/develop
Commit: fa78fa55fef8964de784f32255d12bef6e2e764c
Parents: 8bf44fd
Author: Sebastian Schaffert <ss...@apache.org>
Authored: Tue Nov 5 17:15:50 2013 +0100
Committer: Sebastian Schaffert <ss...@apache.org>
Committed: Tue Nov 5 17:15:50 2013 +0100
----------------------------------------------------------------------
.../marmotta/kiwi/loader/KiWiHandler.java | 85 ++++++++++++++++++++
.../evaluation/KiWiEvaluationStrategyImpl.java | 8 +-
.../persistence/KiWiSparqlConnection.java | 51 ++++++++++--
.../marmotta/kiwi/persistence/KiWiDialect.java | 17 +++-
.../marmotta/kiwi/persistence/h2/H2Dialect.java | 30 ++++++-
.../kiwi/persistence/mysql/MySQLDialect.java | 31 ++++++-
.../persistence/pgsql/PostgreSQLDialect.java | 34 +++++++-
7 files changed, 237 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
new file mode 100644
index 0000000..3a884d6
--- /dev/null
+++ b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
@@ -0,0 +1,85 @@
+package org.apache.marmotta.kiwi.loader;
+
+import org.apache.marmotta.kiwi.persistence.KiWiConnection;
+import org.openrdf.model.Statement;
+import org.openrdf.rio.RDFHandler;
+import org.openrdf.rio.RDFHandlerException;
+
+/**
+ * A fast-lane RDF import handler that allows bulk-importing triples into a KiWi triplestore. It directly accesses
+ * the database using a KiWiConnection.
+ *
+ * @author Sebastian Schaffert (sschaffert@apache.org)
+ */
+public class KiWiHandler implements RDFHandler {
+
+
+ private KiWiConnection connection;
+
+
+
+ /**
+ * Signals the end of the RDF data. This method is called when all data has
+ * been reported.
+ *
+ * @throws org.openrdf.rio.RDFHandlerException
+ * If the RDF handler has encountered an unrecoverable error.
+ */
+ @Override
+ public void endRDF() throws RDFHandlerException {
+ //To change body of implemented methods use File | Settings | File Templates.
+ }
+
+ /**
+ * Signals the start of the RDF data. This method is called before any data
+ * is reported.
+ *
+ * @throws org.openrdf.rio.RDFHandlerException
+ * If the RDF handler has encountered an unrecoverable error.
+ */
+ @Override
+ public void startRDF() throws RDFHandlerException {
+ //To change body of implemented methods use File | Settings | File Templates.
+ }
+
+ /**
+ * Handles a namespace declaration/definition. A namespace declaration
+ * associates a (short) prefix string with the namespace's URI. The prefix
+ * for default namespaces, which do not have an associated prefix, are
+ * represented as empty strings.
+ *
+ * @param prefix The prefix for the namespace, or an empty string in case of a
+ * default namespace.
+ * @param uri The URI that the prefix maps to.
+ * @throws org.openrdf.rio.RDFHandlerException
+ * If the RDF handler has encountered an unrecoverable error.
+ */
+ @Override
+ public void handleNamespace(String prefix, String uri) throws RDFHandlerException {
+ //To change body of implemented methods use File | Settings | File Templates.
+ }
+
+ /**
+ * Handles a statement.
+ *
+ * @param st The statement.
+ * @throws org.openrdf.rio.RDFHandlerException
+ * If the RDF handler has encountered an unrecoverable error.
+ */
+ @Override
+ public void handleStatement(Statement st) throws RDFHandlerException {
+ //To change body of implemented methods use File | Settings | File Templates.
+ }
+
+ /**
+ * Handles a comment.
+ *
+ * @param comment The comment.
+ * @throws org.openrdf.rio.RDFHandlerException
+ * If the RDF handler has encountered an unrecoverable error.
+ */
+ @Override
+ public void handleComment(String comment) throws RDFHandlerException {
+ //To change body of implemented methods use File | Settings | File Templates.
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/evaluation/KiWiEvaluationStrategyImpl.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/evaluation/KiWiEvaluationStrategyImpl.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/evaluation/KiWiEvaluationStrategyImpl.java
index b5b854d..9c79fe3 100644
--- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/evaluation/KiWiEvaluationStrategyImpl.java
+++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/evaluation/KiWiEvaluationStrategyImpl.java
@@ -123,7 +123,7 @@ public class KiWiEvaluationStrategyImpl extends EvaluationStrategyImpl{
* @param expr
* @return
*/
- private static boolean isSupported(TupleExpr expr) {
+ private boolean isSupported(TupleExpr expr) {
if(expr instanceof Join) {
return isSupported(((Join) expr).getLeftArg()) && isSupported(((Join) expr).getRightArg());
} else if(expr instanceof Filter) {
@@ -142,7 +142,7 @@ public class KiWiEvaluationStrategyImpl extends EvaluationStrategyImpl{
* @param expr
* @return
*/
- private static boolean isSupported(ValueExpr expr) {
+ private boolean isSupported(ValueExpr expr) {
if(expr instanceof Compare) {
return isSupported(((Compare) expr).getLeftArg()) && isSupported(((Compare) expr).getRightArg());
} else if(expr instanceof MathExpr) {
@@ -174,7 +174,9 @@ public class KiWiEvaluationStrategyImpl extends EvaluationStrategyImpl{
} else if(expr instanceof LangMatches) {
return isSupported(((LangMatches) expr).getLeftArg()) && isConstant(((LangMatches) expr).getRightArg());
} else if(expr instanceof Regex) {
- return isSupported(((Regex) expr).getArg()) && isAtomic(((Regex) expr).getPatternArg()) && ((Regex) expr).getFlagsArg() == null;
+ ValueExpr flags = ((Regex) expr).getFlagsArg();
+ String _flags = flags != null && flags instanceof ValueConstant ? ((ValueConstant)flags).getValue().stringValue() : null;
+ return isSupported(((Regex) expr).getArg()) && isAtomic(((Regex) expr).getPatternArg()) && connection.getDialect().isRegexpSupported(_flags);
} else {
return false;
}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
index d65b059..8e0a303 100644
--- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
+++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
@@ -27,10 +27,15 @@ import org.apache.marmotta.commons.sesame.model.Namespaces;
import org.apache.marmotta.commons.util.DateUtils;
import org.apache.marmotta.kiwi.model.rdf.KiWiNode;
import org.apache.marmotta.kiwi.persistence.KiWiConnection;
+import org.apache.marmotta.kiwi.persistence.KiWiDialect;
import org.apache.marmotta.kiwi.persistence.util.ResultSetIteration;
import org.apache.marmotta.kiwi.persistence.util.ResultTransformerFunction;
import org.apache.marmotta.kiwi.sail.KiWiValueFactory;
-import org.openrdf.model.*;
+import org.openrdf.model.BNode;
+import org.openrdf.model.Literal;
+import org.openrdf.model.Resource;
+import org.openrdf.model.URI;
+import org.openrdf.model.Value;
import org.openrdf.model.vocabulary.SESAME;
import org.openrdf.query.Binding;
import org.openrdf.query.BindingSet;
@@ -45,8 +50,20 @@ import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
-import java.util.*;
-import java.util.concurrent.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CancellationException;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
import java.util.regex.Pattern;
/**
@@ -503,7 +520,7 @@ public class KiWiSparqlConnection {
} else if(expr instanceof Regex) {
Regex re = (Regex)expr;
- return optimizeRegexp(evaluateExpression(re.getArg(),queryVariables, optype), evaluateExpression(re.getPatternArg(), queryVariables, OPTypes.STRING));
+ return optimizeRegexp(evaluateExpression(re.getArg(),queryVariables, optype), evaluateExpression(re.getPatternArg(), queryVariables, OPTypes.STRING), re.getFlagsArg());
} else if(expr instanceof LangMatches) {
LangMatches lm = (LangMatches)expr;
String value = evaluateExpression(lm.getLeftArg(), queryVariables, optype);
@@ -740,7 +757,9 @@ public class KiWiSparqlConnection {
* @param pattern
* @return
*/
- private String optimizeRegexp(String value, String pattern) {
+ private String optimizeRegexp(String value, String pattern, ValueExpr flags) {
+ String _flags = flags != null && flags instanceof ValueConstant ? ((ValueConstant)flags).getValue().stringValue() : null;
+
String simplified = pattern;
// apply simplifications
@@ -769,7 +788,7 @@ public class KiWiSparqlConnection {
if(!simplified.endsWith("$")) {
simplified = simplified + "%";
} else {
- simplified = simplified.substring(0,simplified.length()-2);
+ simplified = simplified.substring(0,simplified.length()-1);
}
// replace all non-escaped occurrences of .* with %
@@ -782,9 +801,21 @@ public class KiWiSparqlConnection {
Pattern notSimplifiable = Pattern.compile("(?<!\\\\)[\\.\\*\\+\\{\\}\\[\\]\\|]");
if(notSimplifiable.matcher(simplified).find()) {
- return parent.getDialect().getRegexp(value, pattern);
+ return parent.getDialect().getRegexp(value, pattern, _flags);
} else {
- return value + " LIKE '"+simplified+"'";
+ if(!simplified.startsWith("%") && !simplified.endsWith("%")) {
+ if(StringUtils.containsIgnoreCase(_flags,"i")) {
+ return String.format("lower(%s) = lower('%s')", value, simplified);
+ } else {
+ return String.format("%s = '%s'", value, simplified);
+ }
+ } else {
+ if(StringUtils.containsIgnoreCase(_flags,"i")) {
+ return parent.getDialect().getILike(value, "'" + simplified + "'");
+ } else {
+ return value + " LIKE '"+simplified+"'";
+ }
+ }
}
}
@@ -805,4 +836,8 @@ public class KiWiSparqlConnection {
private static enum OPTypes {
STRING, DOUBLE, INT, DATE, ANY
}
+
+ public KiWiDialect getDialect() {
+ return parent.getDialect();
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/KiWiDialect.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/KiWiDialect.java b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/KiWiDialect.java
index cbf6fbf..2ab4051 100644
--- a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/KiWiDialect.java
+++ b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/KiWiDialect.java
@@ -23,7 +23,10 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.URL;
-import java.util.*;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.Properties;
+import java.util.Set;
/**
* A dialect provides the SQL statements necessary to access the different types of database systems. Each
@@ -193,12 +196,22 @@ public abstract class KiWiDialect {
/**
* Return the database specific operator for matching a text against a regular expression.
*
+ *
* @param text
* @param pattern
+ * @param flags
* @return
*/
- public abstract String getRegexp(String text, String pattern);
+ public abstract String getRegexp(String text, String pattern, String flags);
+
+ /**
+ * Return true in case the SPARQL RE flags contained in the given string are supported.
+ *
+ * @param flags
+ * @return
+ */
+ public abstract boolean isRegexpSupported(String flags);
/**
* Return the database specific case insensitive like comparison, e.g. ILIKE in Postgres.
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/h2/H2Dialect.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/h2/H2Dialect.java b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/h2/H2Dialect.java
index 8bbb1a6..98cc88e 100644
--- a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/h2/H2Dialect.java
+++ b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/h2/H2Dialect.java
@@ -17,6 +17,7 @@
*/
package org.apache.marmotta.kiwi.persistence.h2;
+import org.apache.commons.lang3.StringUtils;
import org.apache.marmotta.kiwi.exception.DriverNotFoundException;
import org.apache.marmotta.kiwi.persistence.KiWiDialect;
@@ -51,8 +52,33 @@ public class H2Dialect extends KiWiDialect {
}
@Override
- public String getRegexp(String text, String pattern) {
- return text + " REGEXP " + pattern;
+ public String getRegexp(String text, String pattern, String flags) {
+ if(StringUtils.containsIgnoreCase(flags, "i")) {
+ return String.format("lower(%s) REGEXP lower(%s)",text,pattern);
+ } else {
+ return text + " REGEXP " + pattern;
+ }
+ }
+
+ /**
+ * Return true in case the SPARQL RE flags contained in the given string are supported.
+ *
+ * @param flags
+ * @return
+ */
+ @Override
+ public boolean isRegexpSupported(String flags) {
+ if(StringUtils.containsIgnoreCase(flags,"s")) {
+ return false;
+ }
+ if(StringUtils.containsIgnoreCase(flags,"m")) {
+ return false;
+ }
+ if(StringUtils.containsIgnoreCase(flags,"x")) {
+ return false;
+ }
+
+ return true;
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/mysql/MySQLDialect.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/mysql/MySQLDialect.java b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/mysql/MySQLDialect.java
index 7914363..6446891 100644
--- a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/mysql/MySQLDialect.java
+++ b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/mysql/MySQLDialect.java
@@ -17,6 +17,7 @@
*/
package org.apache.marmotta.kiwi.persistence.mysql;
+import org.apache.commons.lang3.StringUtils;
import org.apache.marmotta.kiwi.exception.DriverNotFoundException;
import org.apache.marmotta.kiwi.persistence.KiWiDialect;
@@ -62,10 +63,36 @@ public class MySQLDialect extends KiWiDialect {
}
@Override
- public String getRegexp(String text, String pattern) {
- return text + " RLIKE " + pattern;
+ public String getRegexp(String text, String pattern, String flags) {
+ if(StringUtils.containsIgnoreCase(flags,"i")) {
+ return String.format("lower(%s) RLIKE lower(%s)",text,pattern);
+ } else {
+ return text + " RLIKE " + pattern;
+ }
+ }
+
+ /**
+ * Return true in case the SPARQL RE flags contained in the given string are supported.
+ *
+ * @param flags
+ * @return
+ */
+ @Override
+ public boolean isRegexpSupported(String flags) {
+ if(StringUtils.containsIgnoreCase(flags,"s")) {
+ return false;
+ }
+ if(StringUtils.containsIgnoreCase(flags,"m")) {
+ return false;
+ }
+ if(StringUtils.containsIgnoreCase(flags,"x")) {
+ return false;
+ }
+
+ return true;
}
+
@Override
public String getILike(String text, String pattern) {
return "lower("+text+") LIKE lower("+pattern+")";
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/pgsql/PostgreSQLDialect.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/pgsql/PostgreSQLDialect.java b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/pgsql/PostgreSQLDialect.java
index 4110309..ac51678 100644
--- a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/pgsql/PostgreSQLDialect.java
+++ b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/pgsql/PostgreSQLDialect.java
@@ -17,6 +17,7 @@
*/
package org.apache.marmotta.kiwi.persistence.pgsql;
+import org.apache.commons.lang3.StringUtils;
import org.apache.marmotta.kiwi.exception.DriverNotFoundException;
import org.apache.marmotta.kiwi.persistence.KiWiDialect;
@@ -52,8 +53,37 @@ public class PostgreSQLDialect extends KiWiDialect {
}
@Override
- public String getRegexp(String text, String pattern) {
- return text + " ~ " + pattern;
+ public String getRegexp(String text, String pattern, String flags) {
+ StringBuilder flagList = new StringBuilder();
+ if(StringUtils.containsIgnoreCase(flags,"i")) {
+ flagList.append("i");
+ }
+ if(flagList.length() == 0) {
+ return text + " ~ " + pattern;
+ } else {
+ return String.format("%s ~ (?%s)%s", text, flagList.toString(), pattern);
+ }
+ }
+
+ /**
+ * Return true in case the SPARQL RE flags contained in the given string are supported.
+ *
+ * @param flags
+ * @return
+ */
+ @Override
+ public boolean isRegexpSupported(String flags) {
+ if(StringUtils.containsIgnoreCase(flags, "s")) {
+ return false;
+ }
+ if(StringUtils.containsIgnoreCase(flags,"m")) {
+ return false;
+ }
+ if(StringUtils.containsIgnoreCase(flags,"x")) {
+ return false;
+ }
+
+ return true;
}
@Override