You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by ss...@apache.org on 2013/11/05 17:15:56 UTC

git commit: bug fixes in KiWi SPARQL: better regexp support

Updated Branches:
  refs/heads/develop 8bf44fd17 -> fa78fa55f


bug fixes in KiWi SPARQL: better regexp support


Project: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/commit/fa78fa55
Tree: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/tree/fa78fa55
Diff: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/diff/fa78fa55

Branch: refs/heads/develop
Commit: fa78fa55fef8964de784f32255d12bef6e2e764c
Parents: 8bf44fd
Author: Sebastian Schaffert <ss...@apache.org>
Authored: Tue Nov 5 17:15:50 2013 +0100
Committer: Sebastian Schaffert <ss...@apache.org>
Committed: Tue Nov 5 17:15:50 2013 +0100

----------------------------------------------------------------------
 .../marmotta/kiwi/loader/KiWiHandler.java       | 85 ++++++++++++++++++++
 .../evaluation/KiWiEvaluationStrategyImpl.java  |  8 +-
 .../persistence/KiWiSparqlConnection.java       | 51 ++++++++++--
 .../marmotta/kiwi/persistence/KiWiDialect.java  | 17 +++-
 .../marmotta/kiwi/persistence/h2/H2Dialect.java | 30 ++++++-
 .../kiwi/persistence/mysql/MySQLDialect.java    | 31 ++++++-
 .../persistence/pgsql/PostgreSQLDialect.java    | 34 +++++++-
 7 files changed, 237 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
new file mode 100644
index 0000000..3a884d6
--- /dev/null
+++ b/libraries/kiwi/kiwi-loader/src/main/java/org/apache/marmotta/kiwi/loader/KiWiHandler.java
@@ -0,0 +1,85 @@
+package org.apache.marmotta.kiwi.loader;
+
+import org.apache.marmotta.kiwi.persistence.KiWiConnection;
+import org.openrdf.model.Statement;
+import org.openrdf.rio.RDFHandler;
+import org.openrdf.rio.RDFHandlerException;
+
+/**
+ * A fast-lane RDF import handler that allows bulk-importing triples into a KiWi triplestore. It directly accesses
+ * the database using a KiWiConnection.
+ *
+ * @author Sebastian Schaffert (sschaffert@apache.org)
+ */
+public class KiWiHandler implements RDFHandler {
+
+
+    private KiWiConnection connection;
+
+
+
+    /**
+     * Signals the end of the RDF data. This method is called when all data has
+     * been reported.
+     *
+     * @throws org.openrdf.rio.RDFHandlerException
+     *          If the RDF handler has encountered an unrecoverable error.
+     */
+    @Override
+    public void endRDF() throws RDFHandlerException {
+        //To change body of implemented methods use File | Settings | File Templates.
+    }
+
+    /**
+     * Signals the start of the RDF data. This method is called before any data
+     * is reported.
+     *
+     * @throws org.openrdf.rio.RDFHandlerException
+     *          If the RDF handler has encountered an unrecoverable error.
+     */
+    @Override
+    public void startRDF() throws RDFHandlerException {
+        //To change body of implemented methods use File | Settings | File Templates.
+    }
+
+    /**
+     * Handles a namespace declaration/definition. A namespace declaration
+     * associates a (short) prefix string with the namespace's URI. The prefix
+     * for default namespaces, which do not have an associated prefix, are
+     * represented as empty strings.
+     *
+     * @param prefix The prefix for the namespace, or an empty string in case of a
+     *               default namespace.
+     * @param uri    The URI that the prefix maps to.
+     * @throws org.openrdf.rio.RDFHandlerException
+     *          If the RDF handler has encountered an unrecoverable error.
+     */
+    @Override
+    public void handleNamespace(String prefix, String uri) throws RDFHandlerException {
+        //To change body of implemented methods use File | Settings | File Templates.
+    }
+
+    /**
+     * Handles a statement.
+     *
+     * @param st The statement.
+     * @throws org.openrdf.rio.RDFHandlerException
+     *          If the RDF handler has encountered an unrecoverable error.
+     */
+    @Override
+    public void handleStatement(Statement st) throws RDFHandlerException {
+        //To change body of implemented methods use File | Settings | File Templates.
+    }
+
+    /**
+     * Handles a comment.
+     *
+     * @param comment The comment.
+     * @throws org.openrdf.rio.RDFHandlerException
+     *          If the RDF handler has encountered an unrecoverable error.
+     */
+    @Override
+    public void handleComment(String comment) throws RDFHandlerException {
+        //To change body of implemented methods use File | Settings | File Templates.
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/evaluation/KiWiEvaluationStrategyImpl.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/evaluation/KiWiEvaluationStrategyImpl.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/evaluation/KiWiEvaluationStrategyImpl.java
index b5b854d..9c79fe3 100644
--- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/evaluation/KiWiEvaluationStrategyImpl.java
+++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/evaluation/KiWiEvaluationStrategyImpl.java
@@ -123,7 +123,7 @@ public class KiWiEvaluationStrategyImpl extends EvaluationStrategyImpl{
      * @param expr
      * @return
      */
-    private static boolean isSupported(TupleExpr expr) {
+    private boolean isSupported(TupleExpr expr) {
         if(expr instanceof Join) {
             return isSupported(((Join) expr).getLeftArg()) && isSupported(((Join) expr).getRightArg());
         } else if(expr instanceof Filter) {
@@ -142,7 +142,7 @@ public class KiWiEvaluationStrategyImpl extends EvaluationStrategyImpl{
      * @param expr
      * @return
      */
-    private static boolean isSupported(ValueExpr expr) {
+    private boolean isSupported(ValueExpr expr) {
         if(expr instanceof Compare) {
             return isSupported(((Compare) expr).getLeftArg()) && isSupported(((Compare) expr).getRightArg());
         } else if(expr instanceof MathExpr) {
@@ -174,7 +174,9 @@ public class KiWiEvaluationStrategyImpl extends EvaluationStrategyImpl{
         } else if(expr instanceof LangMatches) {
             return isSupported(((LangMatches) expr).getLeftArg()) && isConstant(((LangMatches) expr).getRightArg());
         } else if(expr instanceof Regex) {
-            return isSupported(((Regex) expr).getArg()) && isAtomic(((Regex) expr).getPatternArg()) && ((Regex) expr).getFlagsArg() == null;
+            ValueExpr flags = ((Regex) expr).getFlagsArg();
+            String _flags = flags != null && flags instanceof ValueConstant ? ((ValueConstant)flags).getValue().stringValue() : null;
+            return isSupported(((Regex) expr).getArg()) && isAtomic(((Regex) expr).getPatternArg()) && connection.getDialect().isRegexpSupported(_flags);
         } else {
             return false;
         }

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
index d65b059..8e0a303 100644
--- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
+++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
@@ -27,10 +27,15 @@ import org.apache.marmotta.commons.sesame.model.Namespaces;
 import org.apache.marmotta.commons.util.DateUtils;
 import org.apache.marmotta.kiwi.model.rdf.KiWiNode;
 import org.apache.marmotta.kiwi.persistence.KiWiConnection;
+import org.apache.marmotta.kiwi.persistence.KiWiDialect;
 import org.apache.marmotta.kiwi.persistence.util.ResultSetIteration;
 import org.apache.marmotta.kiwi.persistence.util.ResultTransformerFunction;
 import org.apache.marmotta.kiwi.sail.KiWiValueFactory;
-import org.openrdf.model.*;
+import org.openrdf.model.BNode;
+import org.openrdf.model.Literal;
+import org.openrdf.model.Resource;
+import org.openrdf.model.URI;
+import org.openrdf.model.Value;
 import org.openrdf.model.vocabulary.SESAME;
 import org.openrdf.query.Binding;
 import org.openrdf.query.BindingSet;
@@ -45,8 +50,20 @@ import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
-import java.util.*;
-import java.util.concurrent.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CancellationException;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.regex.Pattern;
 
 /**
@@ -503,7 +520,7 @@ public class KiWiSparqlConnection {
         } else if(expr instanceof Regex) {
             Regex re = (Regex)expr;
 
-            return optimizeRegexp(evaluateExpression(re.getArg(),queryVariables, optype), evaluateExpression(re.getPatternArg(), queryVariables, OPTypes.STRING));
+            return optimizeRegexp(evaluateExpression(re.getArg(),queryVariables, optype), evaluateExpression(re.getPatternArg(), queryVariables, OPTypes.STRING), re.getFlagsArg());
         } else if(expr instanceof LangMatches) {
             LangMatches lm = (LangMatches)expr;
             String value = evaluateExpression(lm.getLeftArg(), queryVariables, optype);
@@ -740,7 +757,9 @@ public class KiWiSparqlConnection {
      * @param pattern
      * @return
      */
-    private String optimizeRegexp(String value, String pattern) {
+    private String optimizeRegexp(String value, String pattern, ValueExpr flags) {
+        String _flags = flags != null && flags instanceof ValueConstant ? ((ValueConstant)flags).getValue().stringValue() : null;
+
         String simplified = pattern;
 
         // apply simplifications
@@ -769,7 +788,7 @@ public class KiWiSparqlConnection {
         if(!simplified.endsWith("$")) {
             simplified = simplified + "%";
         } else {
-            simplified = simplified.substring(0,simplified.length()-2);
+            simplified = simplified.substring(0,simplified.length()-1);
         }
 
         // replace all non-escaped occurrences of .* with %
@@ -782,9 +801,21 @@ public class KiWiSparqlConnection {
         Pattern notSimplifiable = Pattern.compile("(?<!\\\\)[\\.\\*\\+\\{\\}\\[\\]\\|]");
 
         if(notSimplifiable.matcher(simplified).find()) {
-            return parent.getDialect().getRegexp(value, pattern);
+            return parent.getDialect().getRegexp(value, pattern, _flags);
         } else {
-            return value + " LIKE '"+simplified+"'";
+            if(!simplified.startsWith("%") && !simplified.endsWith("%")) {
+                if(StringUtils.containsIgnoreCase(_flags,"i")) {
+                    return String.format("lower(%s) = lower('%s')", value, simplified);
+                } else {
+                    return String.format("%s = '%s'", value, simplified);
+                }
+            } else {
+                if(StringUtils.containsIgnoreCase(_flags,"i")) {
+                    return parent.getDialect().getILike(value, "'" + simplified + "'");
+                } else {
+                    return value + " LIKE '"+simplified+"'";
+                }
+            }
         }
 
     }
@@ -805,4 +836,8 @@ public class KiWiSparqlConnection {
     private static enum OPTypes {
         STRING, DOUBLE, INT, DATE, ANY
     }
+
+    public KiWiDialect getDialect() {
+        return parent.getDialect();
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/KiWiDialect.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/KiWiDialect.java b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/KiWiDialect.java
index cbf6fbf..2ab4051 100644
--- a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/KiWiDialect.java
+++ b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/KiWiDialect.java
@@ -23,7 +23,10 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.net.URL;
-import java.util.*;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.Properties;
+import java.util.Set;
 
 /**
  * A dialect provides the SQL statements necessary to access the different types of database systems. Each
@@ -193,12 +196,22 @@ public abstract class KiWiDialect {
     /**
      * Return the database specific operator for matching a text against a regular expression.
      *
+     *
      * @param text
      * @param pattern
+     * @param flags
      * @return
      */
-    public abstract String getRegexp(String text, String pattern);
+    public abstract String getRegexp(String text, String pattern, String flags);
+
 
+    /**
+     * Return true in case the SPARQL RE flags contained in the given string are supported.
+     *
+     * @param flags
+     * @return
+     */
+    public abstract boolean isRegexpSupported(String flags);
 
     /**
      * Return the database specific case insensitive like comparison, e.g. ILIKE in Postgres.

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/h2/H2Dialect.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/h2/H2Dialect.java b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/h2/H2Dialect.java
index 8bbb1a6..98cc88e 100644
--- a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/h2/H2Dialect.java
+++ b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/h2/H2Dialect.java
@@ -17,6 +17,7 @@
  */
 package org.apache.marmotta.kiwi.persistence.h2;
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.marmotta.kiwi.exception.DriverNotFoundException;
 import org.apache.marmotta.kiwi.persistence.KiWiDialect;
 
@@ -51,8 +52,33 @@ public class H2Dialect extends KiWiDialect {
     }
 
     @Override
-    public String getRegexp(String text, String pattern) {
-        return text + " REGEXP " + pattern;
+    public String getRegexp(String text, String pattern, String flags) {
+        if(StringUtils.containsIgnoreCase(flags, "i")) {
+            return String.format("lower(%s) REGEXP lower(%s)",text,pattern);
+        } else {
+            return text + " REGEXP " + pattern;
+        }
+    }
+
+    /**
+     * Return true in case the SPARQL RE flags contained in the given string are supported.
+     *
+     * @param flags
+     * @return
+     */
+    @Override
+    public boolean isRegexpSupported(String flags) {
+        if(StringUtils.containsIgnoreCase(flags,"s")) {
+            return false;
+        }
+        if(StringUtils.containsIgnoreCase(flags,"m")) {
+            return false;
+        }
+        if(StringUtils.containsIgnoreCase(flags,"x")) {
+            return false;
+        }
+
+        return true;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/mysql/MySQLDialect.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/mysql/MySQLDialect.java b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/mysql/MySQLDialect.java
index 7914363..6446891 100644
--- a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/mysql/MySQLDialect.java
+++ b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/mysql/MySQLDialect.java
@@ -17,6 +17,7 @@
  */
 package org.apache.marmotta.kiwi.persistence.mysql;
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.marmotta.kiwi.exception.DriverNotFoundException;
 import org.apache.marmotta.kiwi.persistence.KiWiDialect;
 
@@ -62,10 +63,36 @@ public class MySQLDialect extends KiWiDialect {
     }
 
     @Override
-    public String getRegexp(String text, String pattern) {
-        return text + " RLIKE " + pattern;
+    public String getRegexp(String text, String pattern, String flags) {
+        if(StringUtils.containsIgnoreCase(flags,"i")) {
+            return String.format("lower(%s) RLIKE lower(%s)",text,pattern);
+        } else {
+            return text + " RLIKE " + pattern;
+        }
+    }
+
+    /**
+     * Return true in case the SPARQL RE flags contained in the given string are supported.
+     *
+     * @param flags
+     * @return
+     */
+    @Override
+    public boolean isRegexpSupported(String flags) {
+        if(StringUtils.containsIgnoreCase(flags,"s")) {
+            return false;
+        }
+        if(StringUtils.containsIgnoreCase(flags,"m")) {
+            return false;
+        }
+        if(StringUtils.containsIgnoreCase(flags,"x")) {
+            return false;
+        }
+
+        return true;
     }
 
+
     @Override
     public String getILike(String text, String pattern) {
         return "lower("+text+") LIKE lower("+pattern+")";

http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/fa78fa55/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/pgsql/PostgreSQLDialect.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/pgsql/PostgreSQLDialect.java b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/pgsql/PostgreSQLDialect.java
index 4110309..ac51678 100644
--- a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/pgsql/PostgreSQLDialect.java
+++ b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/persistence/pgsql/PostgreSQLDialect.java
@@ -17,6 +17,7 @@
  */
 package org.apache.marmotta.kiwi.persistence.pgsql;
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.marmotta.kiwi.exception.DriverNotFoundException;
 import org.apache.marmotta.kiwi.persistence.KiWiDialect;
 
@@ -52,8 +53,37 @@ public class PostgreSQLDialect extends KiWiDialect {
     }
 
     @Override
-    public String getRegexp(String text, String pattern) {
-        return text + " ~ " + pattern;
+    public String getRegexp(String text, String pattern, String flags) {
+        StringBuilder flagList = new StringBuilder();
+        if(StringUtils.containsIgnoreCase(flags,"i")) {
+            flagList.append("i");
+        }
+        if(flagList.length() == 0) {
+            return text + " ~ " + pattern;
+        } else {
+            return String.format("%s ~ (?%s)%s", text, flagList.toString(), pattern);
+        }
+    }
+
+    /**
+     * Return true in case the SPARQL RE flags contained in the given string are supported.
+     *
+     * @param flags
+     * @return
+     */
+    @Override
+    public boolean isRegexpSupported(String flags) {
+        if(StringUtils.containsIgnoreCase(flags, "s")) {
+            return false;
+        }
+        if(StringUtils.containsIgnoreCase(flags,"m")) {
+            return false;
+        }
+        if(StringUtils.containsIgnoreCase(flags,"x")) {
+            return false;
+        }
+
+        return true;
     }
 
     @Override