You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2014/04/16 11:51:41 UTC

svn commit: r1587849 - in /stanbol/trunk/entityhub: ./ generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/ query/sparql/src/main/java/org/apache/stanbol/entityhub/query/sparql/ query/sparql/src/test/java/org/apache/stanbol/...

Author: rwesten
Date: Wed Apr 16 09:51:40 2014
New Revision: 1587849

URL: http://svn.apache.org/r1587849
Log:
merged fix for STANBOL-1277 to trunk

Modified:
    stanbol/trunk/entityhub/   (props changed)
    stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/PatternUtils.java
    stanbol/trunk/entityhub/query/sparql/src/main/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtils.java
    stanbol/trunk/entityhub/query/sparql/src/test/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtilsTest.java

Propchange: stanbol/trunk/entityhub/
------------------------------------------------------------------------------
  Merged /stanbol/branches/release-0.12/entityhub:r1587844

Modified: stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/PatternUtils.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/PatternUtils.java?rev=1587849&r1=1587848&r2=1587849&view=diff
==============================================================================
--- stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/PatternUtils.java (original)
+++ stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/PatternUtils.java Wed Apr 16 09:51:40 2014
@@ -34,6 +34,31 @@ public final class PatternUtils {
         if(strict){
             regex.append('^');
         }
+        encodeWildcard(wildcard, regex);
+        if(strict){
+            regex.append('$');
+        }
+        return regex.toString();
+    }
+
+    /**
+     * Converts a Wildcard search string to REGEX matching whole words in 
+     * the text. 
+     * @param wildcard the wildcard pattern
+     * @return the regex pattern for the parsed wildcard
+     * @since 0.12.1
+     */
+    public static String wildcardWordToRegex(String wildcard){
+        StringBuilder regex = new StringBuilder("\\b");
+        encodeWildcard(wildcard, regex);
+        return regex.append("\\b").toString();
+    }
+    /**
+     * Internally used to convert a wildcard to a regex
+     * @param wildcard
+     * @param regex
+     */
+    private static void encodeWildcard(String wildcard, StringBuilder regex) {
         for (char c : wildcard.toCharArray()) {
             switch(c) {
                 case '*':
@@ -52,28 +77,49 @@ public final class PatternUtils {
                     break;
             }
         }
-        if(strict){
-            regex.append('$');
-        }
-        return regex.toString();
     }
+    
     public static String value2Regex(String value){
         return '^'+escapeRegex(value)+'$';
     }
-    public static String escapeRegex(String wildcard){
-        StringBuilder escaped = new StringBuilder();
-        for (char c : wildcard.toCharArray()) {
+    /**
+     * Creates a regex that matches vales against whole words
+     * ('<code>\b{value}\b</code>)
+     * @param word the word to match
+     * @return the regex to match words
+     * @since 0.12.1
+     */
+    public static String word2Regex(String word){
+        return escapeRegex(word, new StringBuilder("\\b")).append("\\b").toString();
+    }
+    
+    public static String escapeRegex(String value){
+        return escapeRegex(value, null).toString();
+    }
+    /**
+     * 
+     * @param value the value to escape
+     * @param sb the {@link StringBuilder} or <code>null</code> if a new 
+     * instance should be created
+     * @return the parsed {@link StringBuilder} with the escaped value added.
+     * @since 0.12.1
+     */
+    public static StringBuilder escapeRegex(String value, StringBuilder sb){
+        if(sb == null){
+            sb = new StringBuilder();
+        }
+        for (char c : value.toCharArray()) {
             switch(c) {
                 case '*': case '?': case '(': case ')': case '[': case ']':
                 case '$': case '^': case '.': case '{': case '}': case '|':
                 case '\\':
-                    escaped.append("\\"); //add the escape char
+                    sb.append("\\"); //add the escape char
                 default:
-                    escaped.append(c); //add the char
+                    sb.append(c); //add the char
                     break;
             }
         }
-        return escaped.toString();
+        return sb;
     }
     public static final Pattern PREFIX_REGEX_PATTERN = Pattern.compile("[\\?\\*]");
     /**

Modified: stanbol/trunk/entityhub/query/sparql/src/main/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtils.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/query/sparql/src/main/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtils.java?rev=1587849&r1=1587848&r2=1587849&view=diff
==============================================================================
--- stanbol/trunk/entityhub/query/sparql/src/main/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtils.java (original)
+++ stanbol/trunk/entityhub/query/sparql/src/main/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtils.java Wed Apr 16 09:51:40 2014
@@ -23,7 +23,6 @@ import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -60,7 +59,7 @@ public final class SparqlQueryUtils {
 
     private static final Logger log = LoggerFactory.getLogger(SparqlQueryUtils.class);
 
-    private static final String XSD_DATE_TIME = "http://www.w3.org/2001/XMLSchema#dateTime";
+    //private static final String XSD_DATE_TIME = "http://www.w3.org/2001/XMLSchema#dateTime";
     //private static final DateFormat DATE_FORMAT = new W3CDateFormat();
 
     private SparqlQueryUtils() {}
@@ -757,36 +756,39 @@ public final class SparqlQueryUtils {
                     queryString.append(" \n").append(intend).append("  FILTER(");
                     filterAdded = true;
                     if (constraint.getPatternType() == PatternType.none) {
-                        if (constraint.isCaseSensitive()) {
-                            boolean first = true;
-                            if(constraint.getTexts().size() > 1){
-                                queryString.append('('); //start language filter group (STANBOL-1204)
-                            }
-                            for (String textConstraint : constraint.getTexts()) {
-                                if (first) {
-                                    first = false;
-                                } else {
-                                    queryString.append(" || ");
-                                }
-                                if (textConstraint != null && !textConstraint.isEmpty()) {
-                                    queryString.append("(str(").append(var).append(") = \"");
-                                    addGrammarEscapedValue(queryString, textConstraint);
-                                    queryString.append("\")");
-                                }
-                            }
-                            if(constraint.getTexts().size() > 1){
-                                queryString.append(')'); //end language filter group (STANBOL-1204)
-                            }
-                        } else {
-                            Collection<String> regexQueryTexts = new ArrayList<String>(
-                                    constraint.getTexts().size());
-                            for (String textConstraint : constraint.getTexts()) {
-                                if (textConstraint != null && !textConstraint.isEmpty()) {
-                                    regexQueryTexts.add(PatternUtils.value2Regex(textConstraint));
-                                }
+                        //as we want to match also single words in labels
+                        //we need also to use regex instead of string matching
+                        //in case of case sensitive matches (STANBOL-1277)
+//                        if (constraint.isCaseSensitive()) {
+//                            boolean first = true;
+//                            if(constraint.getTexts().size() > 1){
+//                                queryString.append('('); //start language filter group (STANBOL-1204)
+//                            }
+//                            for (String textConstraint : constraint.getTexts()) {
+//                                if (first) {
+//                                    first = false;
+//                                } else {
+//                                    queryString.append(" || ");
+//                                }
+//                                if (textConstraint != null && !textConstraint.isEmpty()) {
+//                                    queryString.append("(str(").append(var).append(") = \"");
+//                                    addGrammarEscapedValue(queryString, textConstraint);
+//                                    queryString.append("\")");
+//                                }
+//                            }
+//                            if(constraint.getTexts().size() > 1){
+//                                queryString.append(')'); //end language filter group (STANBOL-1204)
+//                            }
+//                        } else {
+                        Collection<String> regexQueryTexts = new ArrayList<String>(
+                                constraint.getTexts().size());
+                        for (String textConstraint : constraint.getTexts()) {
+                            if (textConstraint != null && !textConstraint.isEmpty()) {
+                                regexQueryTexts.add(PatternUtils.word2Regex(textConstraint));
                             }
-                            addRegexFilter(queryString, var, regexQueryTexts, constraint.isCaseSensitive());
                         }
+                        addRegexFilter(queryString, var, regexQueryTexts, constraint.isCaseSensitive());
+//                        }
                     } else if (constraint.getPatternType() == PatternType.wildcard) {
                         // parse false, because that is more in line with the
                         // expectations of users!
@@ -794,7 +796,7 @@ public final class SparqlQueryUtils {
                                 .size());
                         for (String textConstraint : constraint.getTexts()) {
                             if (textConstraint != null && !textConstraint.isEmpty()) {
-                                regexQueryTexts.add(PatternUtils.wildcardToRegex(textConstraint, false));
+                                regexQueryTexts.add(PatternUtils.wildcardWordToRegex(textConstraint));
                             }
                         }
                         addRegexFilter(queryString, var, regexQueryTexts, constraint.isCaseSensitive());

Modified: stanbol/trunk/entityhub/query/sparql/src/test/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtilsTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/query/sparql/src/test/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtilsTest.java?rev=1587849&r1=1587848&r2=1587849&view=diff
==============================================================================
--- stanbol/trunk/entityhub/query/sparql/src/test/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtilsTest.java (original)
+++ stanbol/trunk/entityhub/query/sparql/src/test/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtilsTest.java Wed Apr 16 09:51:40 2014
@@ -33,6 +33,7 @@ import org.apache.stanbol.entityhub.serv
 import org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory;
 import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
 import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint.PatternType;
+import org.junit.Assert;
 import org.junit.Test;
 
 public class SparqlQueryUtilsTest {
@@ -104,6 +105,34 @@ public class SparqlQueryUtilsTest {
     	assertTrue(queryRegex.contains(testString.replaceAll("\\\"", "\\\\\"")));
     }
 
-	
+    /**
+     * Tests word level matching for {@link TextConstraint}s (STANBOL-1277)
+     */
+    @Test
+	public void testMultiWordTextConstraints(){
+        //queries for a TextConstraint with {text1} or {text2} in the languages
+        // {lang1} or {lang2} are expected to look like:
+        //
+        //    select ?entity, ?label where {
+        //        ?entity rdfs:label ?label
+        //        FILTER((regex(str(?label),"\\b{text1}\\b","i") || regex(str(?label),"\\b{text2}\\b","i")) 
+        //            && ((lang(?label) = "{lang1}") || (lang(?label) = "{lang2}"))) . 
+        //    }
+        
+        //first test a pattern type NONE
+        SparqlFieldQuery query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
+        query.setConstraint("urn:field4", new TextConstraint(Arrays.asList("Global","Toy"), PatternType.none, false, "en", null));
+        String queryString = SparqlQueryUtils.createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Standard);
+        Assert.assertTrue(queryString.contains("regex(str(?tmp1),\"\\\\bGlobal\\\\b\",\"i\") "
+            + "|| regex(str(?tmp1),\"\\\\bToy\\\\b\",\"i\")"));
+
+        //also test for pattern type WILDCARD
+        query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
+        query.setConstraint("urn:field4", new TextConstraint(Arrays.asList("Glo?al","Toy"), PatternType.wildcard, false, "en", null));
+        queryString = SparqlQueryUtils.createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Standard);
+        Assert.assertTrue(queryString.contains("regex(str(?tmp1),\"\\\\bGlo.al\\\\b\",\"i\") "
+            + "|| regex(str(?tmp1),\"\\\\bToy\\\\b\",\"i\")"));
+
+    }
 
 }