You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2014/04/16 11:51:41 UTC
svn commit: r1587849 - in /stanbol/trunk/entityhub: ./
generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/
query/sparql/src/main/java/org/apache/stanbol/entityhub/query/sparql/
query/sparql/src/test/java/org/apache/stanbol/...
Author: rwesten
Date: Wed Apr 16 09:51:40 2014
New Revision: 1587849
URL: http://svn.apache.org/r1587849
Log:
merged fix for STANBOL-1277 to trunk
Modified:
stanbol/trunk/entityhub/ (props changed)
stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/PatternUtils.java
stanbol/trunk/entityhub/query/sparql/src/main/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtils.java
stanbol/trunk/entityhub/query/sparql/src/test/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtilsTest.java
Propchange: stanbol/trunk/entityhub/
------------------------------------------------------------------------------
Merged /stanbol/branches/release-0.12/entityhub:r1587844
Modified: stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/PatternUtils.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/PatternUtils.java?rev=1587849&r1=1587848&r2=1587849&view=diff
==============================================================================
--- stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/PatternUtils.java (original)
+++ stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/PatternUtils.java Wed Apr 16 09:51:40 2014
@@ -34,6 +34,31 @@ public final class PatternUtils {
if(strict){
regex.append('^');
}
+ encodeWildcard(wildcard, regex);
+ if(strict){
+ regex.append('$');
+ }
+ return regex.toString();
+ }
+
+ /**
+ * Converts a Wildcard search string to REGEX matching whole words in
+ * the text.
+ * @param wildcard the wildcard pattern
+ * @return the regex pattern for the parsed wildcard
+ * @since 0.12.1
+ */
+ public static String wildcardWordToRegex(String wildcard){
+ StringBuilder regex = new StringBuilder("\\b");
+ encodeWildcard(wildcard, regex);
+ return regex.append("\\b").toString();
+ }
+ /**
+ * Internally used to convert a wildcard to a regex
+ * @param wildcard
+ * @param regex
+ */
+ private static void encodeWildcard(String wildcard, StringBuilder regex) {
for (char c : wildcard.toCharArray()) {
switch(c) {
case '*':
@@ -52,28 +77,49 @@ public final class PatternUtils {
break;
}
}
- if(strict){
- regex.append('$');
- }
- return regex.toString();
}
+
public static String value2Regex(String value){
return '^'+escapeRegex(value)+'$';
}
- public static String escapeRegex(String wildcard){
- StringBuilder escaped = new StringBuilder();
- for (char c : wildcard.toCharArray()) {
+ /**
+ * Creates a regex that matches vales against whole words
+ * ('<code>\b{value}\b</code>)
+ * @param word the word to match
+ * @return the regex to match words
+ * @since 0.12.1
+ */
+ public static String word2Regex(String word){
+ return escapeRegex(word, new StringBuilder("\\b")).append("\\b").toString();
+ }
+
+ public static String escapeRegex(String value){
+ return escapeRegex(value, null).toString();
+ }
+ /**
+ *
+ * @param value the value to escape
+ * @param sb the {@link StringBuilder} or <code>null</code> if a new
+ * instance should be created
+ * @return the parsed {@link StringBuilder} with the escaped value added.
+ * @since 0.12.1
+ */
+ public static StringBuilder escapeRegex(String value, StringBuilder sb){
+ if(sb == null){
+ sb = new StringBuilder();
+ }
+ for (char c : value.toCharArray()) {
switch(c) {
case '*': case '?': case '(': case ')': case '[': case ']':
case '$': case '^': case '.': case '{': case '}': case '|':
case '\\':
- escaped.append("\\"); //add the escape char
+ sb.append("\\"); //add the escape char
default:
- escaped.append(c); //add the char
+ sb.append(c); //add the char
break;
}
}
- return escaped.toString();
+ return sb;
}
public static final Pattern PREFIX_REGEX_PATTERN = Pattern.compile("[\\?\\*]");
/**
Modified: stanbol/trunk/entityhub/query/sparql/src/main/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtils.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/query/sparql/src/main/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtils.java?rev=1587849&r1=1587848&r2=1587849&view=diff
==============================================================================
--- stanbol/trunk/entityhub/query/sparql/src/main/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtils.java (original)
+++ stanbol/trunk/entityhub/query/sparql/src/main/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtils.java Wed Apr 16 09:51:40 2014
@@ -23,7 +23,6 @@ import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
-import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@@ -60,7 +59,7 @@ public final class SparqlQueryUtils {
private static final Logger log = LoggerFactory.getLogger(SparqlQueryUtils.class);
- private static final String XSD_DATE_TIME = "http://www.w3.org/2001/XMLSchema#dateTime";
+ //private static final String XSD_DATE_TIME = "http://www.w3.org/2001/XMLSchema#dateTime";
//private static final DateFormat DATE_FORMAT = new W3CDateFormat();
private SparqlQueryUtils() {}
@@ -757,36 +756,39 @@ public final class SparqlQueryUtils {
queryString.append(" \n").append(intend).append(" FILTER(");
filterAdded = true;
if (constraint.getPatternType() == PatternType.none) {
- if (constraint.isCaseSensitive()) {
- boolean first = true;
- if(constraint.getTexts().size() > 1){
- queryString.append('('); //start language filter group (STANBOL-1204)
- }
- for (String textConstraint : constraint.getTexts()) {
- if (first) {
- first = false;
- } else {
- queryString.append(" || ");
- }
- if (textConstraint != null && !textConstraint.isEmpty()) {
- queryString.append("(str(").append(var).append(") = \"");
- addGrammarEscapedValue(queryString, textConstraint);
- queryString.append("\")");
- }
- }
- if(constraint.getTexts().size() > 1){
- queryString.append(')'); //end language filter group (STANBOL-1204)
- }
- } else {
- Collection<String> regexQueryTexts = new ArrayList<String>(
- constraint.getTexts().size());
- for (String textConstraint : constraint.getTexts()) {
- if (textConstraint != null && !textConstraint.isEmpty()) {
- regexQueryTexts.add(PatternUtils.value2Regex(textConstraint));
- }
+ //as we want to match also single words in labels
+ //we need also to use regex instead of string matching
+ //in case of case sensitive matches (STANBOL-1277)
+// if (constraint.isCaseSensitive()) {
+// boolean first = true;
+// if(constraint.getTexts().size() > 1){
+// queryString.append('('); //start language filter group (STANBOL-1204)
+// }
+// for (String textConstraint : constraint.getTexts()) {
+// if (first) {
+// first = false;
+// } else {
+// queryString.append(" || ");
+// }
+// if (textConstraint != null && !textConstraint.isEmpty()) {
+// queryString.append("(str(").append(var).append(") = \"");
+// addGrammarEscapedValue(queryString, textConstraint);
+// queryString.append("\")");
+// }
+// }
+// if(constraint.getTexts().size() > 1){
+// queryString.append(')'); //end language filter group (STANBOL-1204)
+// }
+// } else {
+ Collection<String> regexQueryTexts = new ArrayList<String>(
+ constraint.getTexts().size());
+ for (String textConstraint : constraint.getTexts()) {
+ if (textConstraint != null && !textConstraint.isEmpty()) {
+ regexQueryTexts.add(PatternUtils.word2Regex(textConstraint));
}
- addRegexFilter(queryString, var, regexQueryTexts, constraint.isCaseSensitive());
}
+ addRegexFilter(queryString, var, regexQueryTexts, constraint.isCaseSensitive());
+// }
} else if (constraint.getPatternType() == PatternType.wildcard) {
// parse false, because that is more in line with the
// expectations of users!
@@ -794,7 +796,7 @@ public final class SparqlQueryUtils {
.size());
for (String textConstraint : constraint.getTexts()) {
if (textConstraint != null && !textConstraint.isEmpty()) {
- regexQueryTexts.add(PatternUtils.wildcardToRegex(textConstraint, false));
+ regexQueryTexts.add(PatternUtils.wildcardWordToRegex(textConstraint));
}
}
addRegexFilter(queryString, var, regexQueryTexts, constraint.isCaseSensitive());
Modified: stanbol/trunk/entityhub/query/sparql/src/test/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtilsTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/query/sparql/src/test/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtilsTest.java?rev=1587849&r1=1587848&r2=1587849&view=diff
==============================================================================
--- stanbol/trunk/entityhub/query/sparql/src/test/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtilsTest.java (original)
+++ stanbol/trunk/entityhub/query/sparql/src/test/java/org/apache/stanbol/entityhub/query/sparql/SparqlQueryUtilsTest.java Wed Apr 16 09:51:40 2014
@@ -33,6 +33,7 @@ import org.apache.stanbol.entityhub.serv
import org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory;
import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint.PatternType;
+import org.junit.Assert;
import org.junit.Test;
public class SparqlQueryUtilsTest {
@@ -104,6 +105,34 @@ public class SparqlQueryUtilsTest {
assertTrue(queryRegex.contains(testString.replaceAll("\\\"", "\\\\\"")));
}
-
+ /**
+ * Tests word level matching for {@link TextConstraint}s (STANBOL-1277)
+ */
+ @Test
+ public void testMultiWordTextConstraints(){
+ //queries for a TextConstraint with {text1} or {text2} in the languages
+ // {lang1} or {lang2} are expected to look like:
+ //
+ // select ?entity, ?label where {
+ // ?entity rdfs:label ?label
+ // FILTER((regex(str(?label),"\\b{text1}\\b","i") || regex(str(?label),"\\b{text2}\\b","i"))
+ // && ((lang(?label) = "{lang1}") || (lang(?label) = "{lang2}"))) .
+ // }
+
+ //first test a pattern type NONE
+ SparqlFieldQuery query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
+ query.setConstraint("urn:field4", new TextConstraint(Arrays.asList("Global","Toy"), PatternType.none, false, "en", null));
+ String queryString = SparqlQueryUtils.createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Standard);
+ Assert.assertTrue(queryString.contains("regex(str(?tmp1),\"\\\\bGlobal\\\\b\",\"i\") "
+ + "|| regex(str(?tmp1),\"\\\\bToy\\\\b\",\"i\")"));
+
+ //also test for pattern type WILDCARD
+ query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
+ query.setConstraint("urn:field4", new TextConstraint(Arrays.asList("Glo?al","Toy"), PatternType.wildcard, false, "en", null));
+ queryString = SparqlQueryUtils.createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Standard);
+ Assert.assertTrue(queryString.contains("regex(str(?tmp1),\"\\\\bGlo.al\\\\b\",\"i\") "
+ + "|| regex(str(?tmp1),\"\\\\bToy\\\\b\",\"i\")"));
+
+ }
}