You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/05/11 15:13:43 UTC
svn commit: r1337141 - in /incubator/stanbol/trunk: commons/solr/core/ commons/web/base/src/main/java/org/apache/stanbol/commons/web/base/resource/ entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ entityhub/jersey/src/main/ja...

Author: rwesten
Date: Fri May 11 13:13:42 2012
New Revision: 1337141

URL: http://svn.apache.org/viewvc?rev=1337141&view=rev
Log:
fixes STANBOL-606, STANBOL-607

* the commons.solr.core module now also exports Apache Lucene.
* this allows the SolrYard to use the TokenStream API. This improves the relay ability of tokenizing query values in the QueryUtils class as described by STANBOL-607

implements STANBOL-598, STANBOL-599

* added a getResourceUri() method to the Stanbol Base Resource to make is more easy to access the URI of the requested Resource

Added:
    incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileProperty.java   (with props)
    incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileValue.java   (with props)
    incubator/stanbol/trunk/entityhub/jersey/src/main/resources/org/apache/stanbol/entityhub/jersey/static/images/google_refine_reconciliation-similarityexample.png   (with props)
Modified:
    incubator/stanbol/trunk/commons/solr/core/pom.xml
    incubator/stanbol/trunk/commons/web/base/src/main/java/org/apache/stanbol/commons/web/base/resource/BaseStanbolResource.java
    incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileQuery.java
    incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/resource/reconcile/BaseGoogleRefineReconcileResource.java
    incubator/stanbol/trunk/entityhub/jersey/src/main/resources/org/apache/stanbol/entityhub/jersey/templates/org/apache/stanbol/entityhub/jersey/resource/reconcile/BaseGoogleRefineReconcileResource/index.ftl
    incubator/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LangEncoder.java
    incubator/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java
    incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/entityhub/it/query/DbpediaQueryTest.java

Modified: incubator/stanbol/trunk/commons/solr/core/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/commons/solr/core/pom.xml?rev=1337141&r1=1337140&r2=1337141&view=diff
==============================================================================
--- incubator/stanbol/trunk/commons/solr/core/pom.xml (original)
+++ incubator/stanbol/trunk/commons/solr/core/pom.xml Fri May 11 13:13:42 2012
@@ -79,7 +79,8 @@
               org.apache.solr.search.*,
               org.apache.solr.update .*,
               org.apache.solr.util.*,
-              org.apache.solr.analysis
+              org.apache.solr.analysis,
+              org.apache.lucene.*,
             </Export-Package>
             <!-- NOTE: 
                * The exclusion of javax.xml.stream assumes that only serialised

Modified: incubator/stanbol/trunk/commons/web/base/src/main/java/org/apache/stanbol/commons/web/base/resource/BaseStanbolResource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/commons/web/base/src/main/java/org/apache/stanbol/commons/web/base/resource/BaseStanbolResource.java?rev=1337141&r1=1337140&r2=1337141&view=diff
==============================================================================
--- incubator/stanbol/trunk/commons/web/base/src/main/java/org/apache/stanbol/commons/web/base/resource/BaseStanbolResource.java (original)
+++ incubator/stanbol/trunk/commons/web/base/src/main/java/org/apache/stanbol/commons/web/base/resource/BaseStanbolResource.java Fri May 11 13:13:42 2012
@@ -52,6 +52,10 @@ public class BaseStanbolResource {
     @Context
     protected ServletContext servletContext;
 
+    public URI getRequestUri(){
+        return uriInfo.getAbsolutePath();
+    }
+    
     public URI getPublicBaseUri() {
         return uriInfo.getBaseUri();
     }

Added: incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileProperty.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileProperty.java?rev=1337141&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileProperty.java (added)
+++ incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileProperty.java Fri May 11 13:13:42 2012
@@ -0,0 +1,163 @@
+package org.apache.stanbol.entityhub.jersey.grefine;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Special properties are used by this Google Refine Reconciliation API
+ * implementation to enable Users to use special features of the Entityhub
+ * 
+ * The Syntax used by those properties is
+ * <code><pre>
+ *     @{propName}[:{propParameter}]
+ * </pre><code>
+ * 
+ * where:<ul>
+ * <li> '@' is the special property indicator
+ * <li> '{propertyName} is parsed as the name of the special property
+ * <li> ':' separates the property name with an optional property value
+ * <li> {propertyParameter} an additional parameter for this special property.
+ * The syntax of the parameter is special property specific.
+ * </ul>
+ * Both the {propertyName} and the {propertyValue} are trimmed.<p>
+ * @author Rupert Westenthaler
+ *
+ */
+public class ReconcileProperty {
+    
+    private static final Logger log = LoggerFactory.getLogger(ReconcileProperty.class);
+    
+    public static final char SPECIAL_PROPERTY_PREFIX = '@';
+    public static final char SPECAIL_PROPERTY_VALUE_SEPARATOR = ':';
+    private final boolean special;
+    private final String name;
+    private final String parameter;
+    
+    private ReconcileProperty(boolean special, String name, String parameter){
+        this.special = special;
+        this.name = name;
+        this.parameter = parameter;
+    }
+    
+    /**
+     * Tests if the parsed name represents a specail property
+     * @param name the name
+     * @return <code>true</code> if the parsed propertyString is not <code>null</code>,
+     * not empty and starts with {@link #SPECIAL_PROPERTY_PREFIX}. Otherwise
+     * <code>false</code>
+     */
+    public static boolean isSpecialProperty(String propertyString){
+        propertyString = StringUtils.trimToNull(propertyString);
+        return propertyString != null && propertyString.length() > 1 && 
+                propertyString.charAt(0) == SPECIAL_PROPERTY_PREFIX;
+    }
+    /**
+     * Parses the Reconcile property from the parsed propertyString
+     * @param propertyString the property string
+     * @return the {@link ReconcileProperty} or <code>null</code> if the parsed
+     * String is illegal formatted.
+     */
+    public static ReconcileProperty parseProperty(String propertyString){
+        propertyString = StringUtils.trimToNull(propertyString);
+        if(propertyString != null){
+            propertyString = StringUtils.trimToNull(propertyString);
+            if(propertyString == null){
+                log.warn("Unable to parse Reconcile Property: The parsed propertyString MUST contain some none trimable chars!");
+            }
+            boolean special = propertyString.charAt(0) == SPECIAL_PROPERTY_PREFIX;
+            if(!special){
+                return new ReconcileProperty(special, NamespaceEnum.getFullName(propertyString), null);
+            } // else parse special property name and parameter
+            if(propertyString.length() < 1){
+                log.warn("Unable to parse Reconcile Property: The parsed propertyString MUST NOT " +
+                        "contain only the special property prefix '{}'!",
+                        SPECIAL_PROPERTY_PREFIX);
+                return null;
+            }
+            int valueSeparatorIndex = propertyString.indexOf(SPECAIL_PROPERTY_VALUE_SEPARATOR);
+            String name = StringUtils.trimToNull(
+                propertyString.substring(1, valueSeparatorIndex > 0 ? 
+                        valueSeparatorIndex : propertyString.length()));
+            if(name == null) {
+                log.warn("Unable to parse Reconcile Property: The parsed special " +
+                		"property '{}' has an empty property name!",propertyString);
+                return null;
+            }
+            return new ReconcileProperty(special, name, 
+                //parse the parameter from the parsed value
+                valueSeparatorIndex > 0 && valueSeparatorIndex < propertyString.length() ?
+                        StringUtils.trimToNull(propertyString.substring(valueSeparatorIndex+1)):null);
+        } else {
+            log.warn("Unable to parse Reconcile Property from NULL or an empty String!");
+            return null;
+        }
+    }
+
+    /**
+     * Getter for the name of the property
+     * @return the name
+     */
+    public String getName() {
+        return name;
+    }
+
+    /**
+     * Getter for the parameter 
+     * @return the value or <code>null</code> if none
+     */
+    public String getParameter() {
+        return parameter;
+    }
+    
+    /**
+     * Checks if this special property has a value or not.
+     * @return if the special property has a value or not.
+     */
+    public boolean hasParameter(){
+        return parameter != null;
+    }
+    /**
+     * If this reconcile property is a special property (starting with an '@')
+     * @return
+     */
+    public boolean isSpecial(){
+        return special;
+    }
+    
+    @Override
+    public int hashCode() {
+        return name.hashCode()+(parameter != null?parameter.hashCode():0)+
+                (special?1:0);
+    }
+    @Override
+    public boolean equals(Object o) {
+        if(o instanceof ReconcileProperty && name.equals(((ReconcileProperty)o).name)
+                && special == ((ReconcileProperty)o).special){
+            return parameter == null && ((ReconcileProperty)o).parameter == null || (
+                    parameter != null && parameter.equals(((ReconcileProperty)o).parameter));
+        } else {
+            return false;
+        }
+    }
+    /**
+     * Serialised the {@link ReconcileProperty} as defined by the syntax
+     * <code><pre>
+     *     @{propName}[:{propValue}]
+     * </pre><code>
+     */
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        if(special){
+            sb.append(SPECIAL_PROPERTY_PREFIX);
+        }
+        sb.append(name);
+        if(parameter != null){
+            sb.append(SPECAIL_PROPERTY_VALUE_SEPARATOR).append(parameter);
+        }
+        return sb.toString();
+    }
+    
+}

Propchange: incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileProperty.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileQuery.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileQuery.java?rev=1337141&r1=1337140&r2=1337141&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileQuery.java (original)
+++ incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileQuery.java Fri May 11 13:13:42 2012
@@ -41,7 +41,7 @@ import org.slf4j.LoggerFactory;
 /**
  * Java Representation for <a href="http://code.google.com/p/google-refine/wiki/ReconciliationServiceApi#Query_Request">
  * Google Refine Reconciliation queries</a>.<p>
- * {@link #getTypes()} and {@link Value#getId()} do support 'prefix:localname'
+ * {@link #getTypes()} and {@link ReconcileValue#getId()} do support 'prefix:localname'
  * syntax for prefixes defined in the {@link NamespaceEnum}.
  * Also defines methods for parsing single and multiple request strings.
  * 
@@ -51,9 +51,13 @@ import org.slf4j.LoggerFactory;
 public class ReconcileQuery {
 
     private static final Logger log = LoggerFactory.getLogger(ReconcileQuery.class);
-    
+    /**
+     * The default limit for suggestions if not explicitly parsed
+     */
     public static final Integer DEFAULT_LIMIT = 5;
-    
+    /**
+     * The default entity type mode if not explicitly parsed by the query
+     */
     public static final TYPE_STRICT DEFAULT_TYPE_STRICT = TYPE_STRICT.any;
     
     private final String query;
@@ -62,12 +66,10 @@ public class ReconcileQuery {
     
     private Integer limit;
     
-    private final Map<String,Collection<Value>> properties = new HashMap<String,Collection<Value>>();
+    private final Map<ReconcileProperty,Collection<ReconcileValue>> properties = new HashMap<ReconcileProperty,Collection<ReconcileValue>>();
     
     private TYPE_STRICT typeStrict;
-    
-    protected static final ValueFactory vf = InMemoryValueFactory.getInstance();
-    
+       
     /**
      * @return the limit
      */
@@ -132,86 +134,30 @@ public class ReconcileQuery {
         }
     }
     
-    public Collection<Value> putProperty(String field, Collection<Value> values){
+    public Collection<ReconcileValue> putProperty(String field, Collection<ReconcileValue> values){
         if(field == null || field.isEmpty()){
             throw new IllegalArgumentException("The field for an property MUST NOT be NULL!");
         }
-        if(values == null || values.isEmpty()){
-            return properties.remove(values);
+        ReconcileProperty property = ReconcileProperty.parseProperty(field);
+        if(property != null){
+            if(values == null || values.isEmpty()){
+                return properties.remove(values);
+            } else {
+                return properties.put(property, values);
+            }
         } else {
-            return properties.put(field, values);
+            return null;
         }
     }
-    public Collection<Value> removeProperty(String field){
+    public Collection<ReconcileValue> removeProperty(String field){
         return properties.remove(field);
     }
-    public Collection<Value> getProperty(String field){
+    public Collection<ReconcileValue> getProperty(String field){
         return properties.get(field);
     }
-    public Iterable<Entry<String,Collection<Value>>> getProperties(){
+    public Iterable<Entry<ReconcileProperty,Collection<ReconcileValue>>> getProperties(){
         return properties.entrySet();
     }
-    /**
-     * Values can be simple JSON values or JSON objects with an 'id' and a
-     * 'name'. This is mapped to {@link Value} objects with an optional 
-     * {@link #getId()} and a required {@link #getValue()}.<p>
-     * The 'id' supports prefix:localname syntax for prefixes defined within the
-     * {@link NamespaceEnum}
-     * @author Rupert Westenthaler
-     *
-     */
-    public static class Value {
-        private final String id;
-        private final Object value;
-
-        private Value(Object value){
-            this(null,value);
-        }
-        private Value(String id, Object value){
-            this.id = id == null ? null : NamespaceEnum.getFullName(id);
-            if(value == null){
-                throw new IllegalArgumentException("The parsed value MUST NOT be NULL!");
-            }
-            this.value = value;
-        }
-
-        /**
-         * The getter for the value of the 'id' property of the 'v' object
-         * if present. This represents the value of fields that are already
-         * successfully linked (reconciled) with some entity.
-         * @return the id
-         */
-        public String getId() {
-            return id;
-        }
-
-        /**
-         * @return the value
-         */
-        public Object getValue() {
-            return value;
-        }
-        /**
-         * Calls the {@link #toString()} method of the {@link #getValue()}
-         */
-        @Override
-        public String toString() {
-            return value.toString();
-        }
-        @Override
-        public int hashCode() {
-            return id != null ? id.hashCode() : value.hashCode();
-        }
-        @Override
-        public boolean equals(Object o) {
-            return o instanceof Value && ( //other is value
-                    (id != null && id.equals(((Value) o).id)) || //ids are equals or 
-                        (id == null && ((Value)o).id == null //ids are null and
-                        && value.equals(((Value)o).value))); //values are equals
-            
-        }
-    }
-    
     public static Map<String,ReconcileQuery> parseQueries(String queriesString) throws WebApplicationException {
         JSONObject jQueries;
         try {
@@ -335,18 +281,18 @@ public class ReconcileQuery {
     private static void parseProperty(ReconcileQuery reconcileQuery,JSONObject jProperty) {
         if(jProperty != null){
             //parse property
-            String property = NamespaceEnum.getFullName(jProperty.optString("pid"));
+            String property = jProperty.optString("pid");
             if(property == null){
                 log.warn("Ignore Property because of missing 'pid'! \n{}",jProperty.toString());
             } else {
                 //property keys may appear multiple times in queries
                 //so we need to initialise the property values with already
                 //existing values
-                Collection<Value> values = reconcileQuery.getProperty(property);
+                Collection<ReconcileValue> values = reconcileQuery.getProperty(property);
                 if(values == null){ //if not create a new Set
                     //maybe the order is important (e.g. for similarity alg) 
                     //   ... so try to keep it
-                    values = new LinkedHashSet<Value>();
+                    values = new LinkedHashSet<ReconcileValue>();
                 }
                 //parse the value
                 Object jValue = jProperty.opt("v");
@@ -354,7 +300,7 @@ public class ReconcileQuery {
                     log.warn("Ignore Property '{}' because it has no value! \n {}",property,jProperty.toString());
                 } else if(jValue instanceof JSONObject){
                     //Reconciliation data available!
-                    Value value = parseValueFromV(jValue);
+                    ReconcileValue value = parseValueFromV(jValue);
                     if(value != null){
                         values.add(value);
                     } else {
@@ -368,7 +314,7 @@ public class ReconcileQuery {
                         jValue = jValueArray.opt(j);
                         if(jValue instanceof JSONObject){
                             //Reconciliation data available!
-                            Value value = parseValueFromV(jValue);
+                            ReconcileValue value = parseValueFromV(jValue);
                             if(value != null){
                                 values.add(value);
                             } else {
@@ -376,7 +322,7 @@ public class ReconcileQuery {
                                     property,jValue.toString());
                             }
                         } else if(jValue != null){
-                            values.add(new Value(jValue));
+                            values.add(new ReconcileValue(jValue));
                         }
                     }
                     if(values.isEmpty()){
@@ -384,7 +330,7 @@ public class ReconcileQuery {
                             property,jProperty.toString());
                     }
                 } else { //number or String
-                    values.add(new Value(jValue)); //directly use the value
+                    values.add(new ReconcileValue(jValue)); //directly use the value
                 }
                 
                 if(!values.isEmpty()){
@@ -400,10 +346,10 @@ public class ReconcileQuery {
      * @return The value or <code>null</code> if the parsed json object does not
      * contain the required information.
      */
-    private static Value parseValueFromV(Object jValue) {
+    private static ReconcileValue parseValueFromV(Object jValue) {
         String id = ((JSONObject)jValue).optString("id");
         String value = ((JSONObject)jValue).optString("name");
-        return value != null ? new Value(id,value) : null;
+        return value != null ? new ReconcileValue(id,value) : null;
     }
     
 }

Added: incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileValue.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileValue.java?rev=1337141&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileValue.java (added)
+++ incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileValue.java Fri May 11 13:13:42 2012
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.entityhub.jersey.grefine;
+
+import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
+
+/**
+ * Values can be simple JSON values or JSON objects with an 'id' and a
+ * 'name'. This is mapped to {@link ReconcileValue} objects with an optional 
+ * {@link #getId()} and a required {@link #getValue()}.<p>
+ * The 'id' supports prefix:localname syntax for prefixes defined within the
+ * {@link NamespaceEnum}
+ * @author Rupert Westenthaler
+ *
+ */
+public class ReconcileValue {
+    private final String id;
+    private final Object value;
+
+    ReconcileValue(Object value){
+        this(null,value);
+    }
+    ReconcileValue(String id, Object value){
+        this.id = id == null ? null : NamespaceEnum.getFullName(id);
+        if(value == null){
+            throw new IllegalArgumentException("The parsed value MUST NOT be NULL!");
+        }
+        this.value = value;
+    }
+
+    /**
+     * The getter for the value of the 'id' property of the 'v' object
+     * if present. This represents the value of fields that are already
+     * successfully linked (reconciled) with some entity.
+     * @return the id
+     */
+    public String getId() {
+        return id;
+    }
+
+    /**
+     * @return the value
+     */
+    public Object getValue() {
+        return value;
+    }
+    /**
+     * Calls the {@link #toString()} method of the {@link #getValue()}
+     */
+    @Override
+    public String toString() {
+        return value.toString();
+    }
+    @Override
+    public int hashCode() {
+        return id != null ? id.hashCode() : value.hashCode();
+    }
+    @Override
+    public boolean equals(Object o) {
+        return o instanceof ReconcileValue && ( //other is value
+                (id != null && id.equals(((ReconcileValue) o).id)) || //ids are equals or 
+                    (id == null && ((ReconcileValue)o).id == null //ids are null and
+                    && value.equals(((ReconcileValue)o).value))); //values are equals
+        
+    }
+}
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/grefine/ReconcileValue.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/resource/reconcile/BaseGoogleRefineReconcileResource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/resource/reconcile/BaseGoogleRefineReconcileResource.java?rev=1337141&r1=1337140&r2=1337141&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/resource/reconcile/BaseGoogleRefineReconcileResource.java (original)
+++ incubator/stanbol/trunk/entityhub/jersey/src/main/java/org/apache/stanbol/entityhub/jersey/resource/reconcile/BaseGoogleRefineReconcileResource.java Fri May 11 13:13:42 2012
@@ -25,8 +25,10 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -47,19 +49,23 @@ import javax.ws.rs.core.Response.Respons
 import org.apache.stanbol.commons.web.base.CorsHelper;
 import org.apache.stanbol.commons.web.base.resource.BaseStanbolResource;
 import org.apache.stanbol.commons.web.base.utils.MediaTypeUtil;
+import org.apache.stanbol.entityhub.jersey.grefine.ReconcileProperty;
 import org.apache.stanbol.entityhub.jersey.grefine.ReconcileQuery;
+import org.apache.stanbol.entityhub.jersey.grefine.ReconcileValue;
 import org.apache.stanbol.entityhub.jersey.grefine.Utils;
-import org.apache.stanbol.entityhub.jersey.grefine.ReconcileQuery.Value;
 import org.apache.stanbol.entityhub.servicesapi.EntityhubException;
 import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
+import org.apache.stanbol.entityhub.servicesapi.defaults.SpecialFieldEnum;
 import org.apache.stanbol.entityhub.servicesapi.model.Reference;
 import org.apache.stanbol.entityhub.servicesapi.model.Representation;
 import org.apache.stanbol.entityhub.servicesapi.model.Text;
 import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
 import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
 import org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint;
+import org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint;
 import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
 import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint;
+import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint.MODE;
 import org.apache.stanbol.entityhub.servicesapi.site.ReferencedSiteException;
 import org.apache.stanbol.entityhub.servicesapi.util.ModelUtils;
 import org.codehaus.jettison.json.JSONArray;
@@ -88,6 +94,18 @@ public abstract class BaseGoogleRefineRe
     private static final Collection<String> SELECTED_FIELDS = Collections.unmodifiableList(
         Arrays.asList(NAME_FIELD,TYPE_FIELD));
 
+    private static final Comparator<JSONObject> resultScoreComparator = new Comparator<JSONObject>() {
+
+        @Override
+        public int compare(JSONObject o1, JSONObject o2) {
+            try {
+                return Double.compare(o2.getDouble("score"),o1.getDouble("score"));
+            } catch (JSONException e) {
+                throw new IllegalStateException(e);
+            }
+        }
+        
+    };
 
     protected BaseGoogleRefineReconcileResource(){
         super();
@@ -186,7 +204,7 @@ public abstract class BaseGoogleRefineRe
         addPropertyConstraints(rQuery, query);
         query.setLimit(query.getLimit());
         QueryResultList<Representation> results = performQuery(query);
-        JSONArray jResultList = new JSONArray();
+        List<JSONObject> jResultList = new ArrayList<JSONObject>(results.size());
         //we need to know the highest score to normalise between [0..1]
         double maxQueryScore = -1;
         if(!results.isEmpty()){
@@ -221,11 +239,13 @@ public abstract class BaseGoogleRefineRe
                 normalisedScore = normalisedScore*similarity/maxQueryScore;
                 jResult.put("score", normalisedScore);
                 jResult.put("match", similarity >= 0);
-                jResultList.put(jResult);
+                jResultList.add(jResult);
             }
         } //else no results ... nothing todo
+        //sort results based on score
+        Collections.sort(jResultList, resultScoreComparator);
         JSONObject jResultContainer = new JSONObject();
-        jResultContainer.put("result", jResultList);
+        jResultContainer.put("result", new JSONArray(jResultList));
         return jResultContainer;
     }
     /**
@@ -255,9 +275,20 @@ public abstract class BaseGoogleRefineRe
         Collection<String> ids = new HashSet<String>();
         List<String> texts = new ArrayList<String>(); // keep order for texts
         Collection<Object> values = new HashSet<Object>();
-        for (Entry<String,Collection<Value>> property : rQuery.getProperties()) {
+        
+        //hold all references for @references special property
+        HashSet<String> references = new HashSet<String>();
+        //holds all texts for @fullText special property
+        List<String> fullText = new ArrayList<String>();
+        //holds the context for the @similarity special property
+        StringBuilder similarityContext = new StringBuilder();
+        //the field used for the @similarity special property
+        HashSet<String> similarityFields = new LinkedHashSet<String>();
+        
+        for (Entry<ReconcileProperty,Collection<ReconcileValue>> propertyEntry : rQuery.getProperties()) {
+            ReconcileProperty property = propertyEntry.getKey();
             // collect the properties
-            for (Value value : property.getValue()) {
+            for (ReconcileValue value : propertyEntry.getValue()) {
                 if (value.getId() != null) {
                     ids.add(value.getId());
                 }
@@ -267,43 +298,109 @@ public abstract class BaseGoogleRefineRe
                     values.add(value.getValue());
                 }
             }
-            // add the Constraint to the FieldQuery
-            // TODO: how to deal with values of different types
-            //  * currently References > Text > Datatype. First present value
-            //    is used
-            //  * non Reference |Â Text | Datatype values are ignored
-            if (!ids.isEmpty()) {
-                // only references -> create reference constraint
-                query.setConstraint(property.getKey(), new ReferenceConstraint(ids));
-                if (ids.size() != property.getValue().size()) {
-                    log.info("Only some of the parsed values of the field {} contain"
-                             + "references -> will ignore values with missing references");
-                }
-            } else if (!texts.isEmpty()) {
-                // NOTE: This will use OR over all texts. To enforce AND one
-                // would need to parse a single string with all values e.g. by
-                // using StringUtils.join(texts," ")
-                query.setConstraint(property.getKey(), new TextConstraint(texts));
-                if (ids.size() != property.getValue().size()) {
-                    log.info("Only some of the parsed values of the field {} are"
-                             + "of type String -> will ignore non-string values");
+            //handle supported special properties
+            if(property.isSpecial()){
+                if(property.getName().equalsIgnoreCase("references")){
+                    //Note that multiple "references" properties might be present
+                    //if Users do parse parameters - so we need to collect all values
+                    if(property.getParameter() != null){
+                        log.warn("parameters are not supported for @references -> ignore '{}'",property.getParameter());
+                    }
+                    if(ids.isEmpty()){
+                        log.warn("No URI values present for parsed @references property! (values: "
+                            +propertyEntry.getValue());
+                    }
+                    for(String id : ids){
+                        references.add(id);
+                    }
+                } else if(property.getName().equalsIgnoreCase("fulltext")){
+                    //Note that multiple "fullText" properties might be present
+                    //if Users do parse parameters - so we need to collect all values
+                    if(property.getParameter() != null){
+                        log.warn("parameters are not supported for @fullText -> ignore '{}'",property.getParameter());
+                    }
+                    for(String text : texts){ //add the values
+                        fullText.add(text);
+                    }
+                } else if(property.getName().equalsIgnoreCase("similarity")){
+                    similarityFields.add(property.getParameter() != null ? 
+                            NamespaceEnum.getFullName(property.getParameter()) :
+                                SpecialFieldEnum.fullText.getUri()); //the default
+                    for(String text : texts){ //Append the text values to the context
+                        similarityContext.append(text).append(' ');
+                    }
+                } else {
+                    //TODO: implement LDPATH support
+                    log.warn("ignore unsupported special property {}",property);
                 }
-            } else if(!values.isEmpty()){
-                query.setConstraint(property.getKey(), new ValueConstraint(values));
-            } //else no values ... ignore property
+            } else { //no special property
+                // add the Constraint to the FieldQuery
+                // TODO: how to deal with values of different types
+                //  * currently References > Text > Datatype. First present value
+                //    is used
+                //  * non Reference |Â Text | Datatype values are ignored
+                if (!ids.isEmpty()) {
+                    // only references -> create reference constraint
+                    query.setConstraint(property.getName(), new ReferenceConstraint(ids));
+                    if (ids.size() != propertyEntry.getValue().size()) {
+                        log.info("Only some of the parsed values of the field {} contain"
+                                 + "references -> will ignore values with missing references");
+                    }
+                } else if (!texts.isEmpty()) {
+                    // NOTE: This will use OR over all texts. To enforce AND one
+                    // would need to parse a single string with all values e.g. by
+                    // using StringUtils.join(texts," ")
+                    query.setConstraint(property.getName(), new TextConstraint(texts));
+                    if (ids.size() != propertyEntry.getValue().size()) {
+                        log.info("Only some of the parsed values of the field {} are"
+                                 + "of type String -> will ignore non-string values");
+                    }
+                } else if(!values.isEmpty()){
+                    query.setConstraint(property.getName(), new ValueConstraint(values));
+                } //else no values ... ignore property
+            }
             //clean up
             ids.clear();
             texts.clear();
             values.clear();
         }
+        //now add constraints for the collected special properties
+        if(!references.isEmpty()){ 
+            //add references constraint
+            ReferenceConstraint refConstraint = new ReferenceConstraint(references, MODE.all);
+            query.setConstraint(SpecialFieldEnum.references.getUri(), refConstraint);
+        }
+        if(!fullText.isEmpty()){
+            TextConstraint textConstraint = new TextConstraint(fullText);
+            query.setConstraint(SpecialFieldEnum.fullText.getUri(), textConstraint);
+            //add full text constraint
+        }
+        if(similarityContext.length() > 0 && !similarityFields.isEmpty()){
+            //add similarity constraint
+            Iterator<String> fieldIt = similarityFields.iterator();
+            String field = fieldIt.next();
+            SimilarityConstraint simConstraint;
+            if(fieldIt.hasNext()){
+                List<String> addFields = new ArrayList<String>(similarityFields.size()-1);
+                while(fieldIt.hasNext()){
+                    addFields.add(fieldIt.next());
+                }
+                simConstraint = new SimilarityConstraint(similarityContext.toString(),addFields);
+            } else {
+                simConstraint = new SimilarityConstraint(similarityContext.toString());
+            }
+            query.setConstraint(field, simConstraint);
+        }
     }
+    
+    
     /**
      * @param rQuery
      * @param query
      */
     private void addTypeConstraint(ReconcileQuery rQuery, FieldQuery query) {
         //maybe an other column was also mapped to the TYPE_FIELD property
-        Collection<Value> additionalTypes = rQuery.removeProperty(TYPE_FIELD);
+        Collection<ReconcileValue> additionalTypes = rQuery.removeProperty(TYPE_FIELD);
         Set<String> queryTypes = rQuery.getTypes();
         Set<String> types = null;
         if(additionalTypes == null){
@@ -315,7 +412,7 @@ public abstract class BaseGoogleRefineRe
             if(queryTypes != null){
                 types.add(rQuery.getQuery());
             }
-            for(Value value : additionalTypes){
+            for(ReconcileValue value : additionalTypes){
                 if(value != null){
                     if(value.getId() != null){
                         types.add(value.getId());
@@ -337,14 +434,14 @@ public abstract class BaseGoogleRefineRe
      */
     private void addNameConstraint(ReconcileQuery rQuery, FieldQuery query) {
         //maybe an other column was also mapped to the NAME_FIELD property
-        Collection<Value> additionalValues = rQuery.removeProperty(NAME_FIELD);
+        Collection<ReconcileValue> additionalValues = rQuery.removeProperty(NAME_FIELD);
         List<String> values;
         if(additionalValues == null){
             values = Collections.singletonList(rQuery.getQuery());
         } else {
             values = new ArrayList<String>(additionalValues.size()+1);
             values.add(rQuery.getQuery());
-            for(Value value : additionalValues){
+            for(ReconcileValue value : additionalValues){
                 if(value != null && value.getValue() instanceof String){
                     values.add((String)value.getValue());
                 }

Added: incubator/stanbol/trunk/entityhub/jersey/src/main/resources/org/apache/stanbol/entityhub/jersey/static/images/google_refine_reconciliation-similarityexample.png
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/jersey/src/main/resources/org/apache/stanbol/entityhub/jersey/static/images/google_refine_reconciliation-similarityexample.png?rev=1337141&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/stanbol/trunk/entityhub/jersey/src/main/resources/org/apache/stanbol/entityhub/jersey/static/images/google_refine_reconciliation-similarityexample.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: incubator/stanbol/trunk/entityhub/jersey/src/main/resources/org/apache/stanbol/entityhub/jersey/templates/org/apache/stanbol/entityhub/jersey/resource/reconcile/BaseGoogleRefineReconcileResource/index.ftl
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/jersey/src/main/resources/org/apache/stanbol/entityhub/jersey/templates/org/apache/stanbol/entityhub/jersey/resource/reconcile/BaseGoogleRefineReconcileResource/index.ftl?rev=1337141&r1=1337140&r2=1337141&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/jersey/src/main/resources/org/apache/stanbol/entityhub/jersey/templates/org/apache/stanbol/entityhub/jersey/resource/reconcile/BaseGoogleRefineReconcileResource/index.ftl (original)
+++ incubator/stanbol/trunk/entityhub/jersey/src/main/resources/org/apache/stanbol/entityhub/jersey/templates/org/apache/stanbol/entityhub/jersey/resource/reconcile/BaseGoogleRefineReconcileResource/index.ftl Fri May 11 13:13:42 2012
@@ -31,16 +31,15 @@ Entityhub.</p>
 <#-- START Collapseable -->
 <div class="docu"> 
     <div class="collapsed">
-        <h3 id="reconcileDocTitle" class="docuTitle">
-            Installation/Usage:</h3>
+        <h3 id="grefeineinstall" class="docuTitle">
+            Google Refine Installation:</h3>
         <script>
-            $("#reconcileDocTitle").click(function () {
-              $("#reconcileDocTitle").parent().toggleClass("collapsed");
+            $("#grefeineinstall").click(function () {
+              $("#grefeineinstall").parent().toggleClass("collapsed");
             }); 
         </script>
         <div class="docuCollapsable">
 
-<h3>Google Refine Installation</h3>
 Users that want to use this service need first to 
 <a href="http://code.google.com/p/google-refine/wiki/InstallationInstructions">
 install Google Refine</a>. Typically Stanbol users will also be interested in
@@ -48,12 +47,30 @@ installing the 
 <a href="http://lab.linkeddata.deri.ie/2010/grefine-rdf-extension/"> RDF
 Extension</a> as this will allow you to map you data to RDF schemata and export
 them as RDF (e.g. to import them afterwards to the Stanbol Entityhub.)
-</p><p>
-<h3>Reconciliation Service Installation</h3>
-After installing Google Refine you will need to create a first project. For the
-sake of testing Reconciliation with the Stanbol Entityhub you can use the
+</p>
+<#-- END Collapseable -->
+        </div>
+    </div>
+</div>  
+
+<#-- START Collapseable -->
+<div class="docu"> 
+    <div class="collapsed">
+        <h3 id="reconcileservieconfig" class="docuTitle">
+            Configuring the Reconciliation Service</h3>
+        <script>
+            $("#reconcileservieconfig").click(function () {
+              $("#reconcileservieconfig").parent().toggleClass("collapsed");
+            }); 
+        </script>
+        <div class="docuCollapsable">
+
+<p>
+To configure a reconciliation service you need first to create a new (or open
+an existing) Google Refine Project. If you do not yet have an project 
+you can use the 
 '<a href="http://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_3_6_0/solr/example/exampledocs/books.csv">
-book.scv</a>' file included in the Apache Solr distribution.
+book.scv</a>' file included in the Apache Solr distribution to create a new one.
 </p><p>
 If you created a new Google Refine Project (e.g. by using the 'books.csv' example)
 you will see the imported data in tabular form. The following Screenshot
@@ -62,17 +79,24 @@ visualises how to open the Reconciliatio
 <p> Via the Reconciliation dialog you can now "install" the Entityhub, Referenced
 Sites or the '/sites' endpoint as <b>Standard Reconciliation Service</b> by
 by pressing the [Add Standard Service ...] Button add copying the URL of 
-this page to the dialog. The following Screenshot shows how the install the
+this page to the dialog. 
+</p><p>
+Service URL:
+<code><pre>
+    ${it.requestUri}
+</pre></code>
+</p><p>
+The following Screenshot shows how the install the
 Referenced Site for DBpedia.org.</p>
 <img src="${it.staticRootUrl}/entityhub/images/google_refine_reconciliation-add_service.png"/>
-<h3>Reconciliation Service Usage</h3>
+<h4>Testing the Service</h4>
 <p>After this step a new Reconciliation Service will show up in the left link.
 In addition the newly installed site will be selected and used to provide 
 suggestions for the initially selected column of you Google Refine project
 (Book Authors if you used the 'book.csv' sample data and selected the 'author_t'
 column). 
 </p><p>
-The final Screenshot shows the installed Reconciliation service based on the
+The next Screenshot shows the installed Reconciliation service based on the
 <b>Stanbol Entityhub: dbpedia Referenced Site</b> that is ready to be used
 to reconcile Entities.</p>
 <img src="${it.staticRootUrl}/entityhub/images/google_refine_reconciliation-use_service.png"/>
@@ -82,6 +106,84 @@ to reconcile Entities.</p>
     </div>
 </div>  
 
+<#-- START Collapseable -->
+<div class="docu"> 
+    <div class="collapsed">
+        <h3 id="reconcileserviceusage" class="docuTitle">
+            Usage of the Reconciliation Service</h3>
+        <script>
+            $("#reconcileserviceusage").click(function () {
+              $("#reconcileserviceusage").parent().toggleClass("collapsed");
+            }); 
+        </script>
+        <div class="docuCollapsable">
+<p>
+This provides first an overview about the usage of the Google Reconciliation service
+dialog and second the documentation of special features provided by this
+implementation.
+</p>
+<h4>Reconciliation Dialog</h4>
+<img src="${it.staticRootUrl}/entityhub/images/google_refine_reconciliation-use_service.png"/>
+<p>Reconciliation Dialog Fields</p><ul>
+<li><b>Reconclie Services:</b> On the left site the list of available Services is shown.
+As soon as you select one Google Refine will send a query of the first ten Entries of
+your current project to that service to obtain some meta data.</li>
+<li><b>Suggested Types:</b>In the middle a list of suggested types is presented.
+This list will be empty if the service does not return any results for the request
+of the first ten entries. You can also manually add the type in the Field below
+the list. It is also possible to reconcile without constraining the type by
+selecting the last option.</li>
+<li><b>Using additional Properties:</b> On the right side the list of all 
+columns of your project is shown. Information of those columns can be used to
+for reconciliation. To use values of other columns the name of the property
+must be specified on the text field next to the column name. The Stanbol
+Entityhub also supports some special option like the semantic context-, full
+text- and similarity-search (see below for details). <br>
+Note that it is possible to use the same property (and special fields) for
+mapping several columns. In this case values of all those columns are merged.</li>
+</ul>
+<p>
+The Entityhub does support qnames (e.g. rdfs:label) for prefixes registered in
+the <a href="http://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java">
+NamespaceEnum</a>.</p>
+<h4>Special Property support</h4>
+<p>
+The Reconciliation Dialog allows to use values of other columns to improve
+reconciliation support. To further improve this ability the Stanbol Entityhub
+supports the following special fields:
+</p><ul>
+<li><b>Full Text</b> '<code>@fullText</code>': This allows to use textual values
+of other fields to be matched against any textual value that is linked with
+suggested Entities (e.g. the values of rdfs:comment, skos:note, dbp-ont:abstract,
+...).</li>
+<li><b>Semantic Context</b> '<code>@references</code>': This allows to match
+the URI values of other columns (that are already reconciled) with suggested
+Entities. This is very useful to link further columns of an project if you have
+already reconciled (and possibly manually corrected/improved) an other column
+of the project. Note that this requires the dataset to define those links</li>
+<li><b>Similarity Search</b> '<code>@similarity</code>': This will use textual
+values to rank returned values based on their similarity (using 
+<a href="http://wiki.apache.org/solr/MoreLikeThis">Solr MoreLikeThis</a>).<br>
+By default this also uses the full text field however users can change this
+by explicitly parsing a {property} URI (or qname)
+'<code>@similarity:{property}</code>' as parameter. Note that parsed fields
+need to be correctly configured to support Solr MLT queries. The documentation
+of the Apache Entityhub Indexing Tool provides more information on that.</li>
+</ul>
+<p>
+The following example shows how to use the '<code>@similarity</code>' for 
+disambiguating music artists based on the name of the track and the album. To
+make this work the <a href="">Musicbrainz</a> was imported in the Entityhub in
+a way that the labels of Albums and Tracks where indexed with the Artists.
+</p>
+<img src="${it.staticRootUrl}/entityhub/images/google_refine_reconciliation-similarityexample.png"/>
+
+<#-- END Collapseable -->
+        </div>
+    </div>
+</div>  
+
+
 </div>
 
 <div class="panel" id="restapi" style="display: none;">

Modified: incubator/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LangEncoder.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LangEncoder.java?rev=1337141&r1=1337140&r2=1337141&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LangEncoder.java (original)
+++ incubator/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LangEncoder.java Fri May 11 13:13:42 2012
@@ -21,7 +21,10 @@ import java.util.Collection;
 import java.util.Collections;
 
 import org.apache.stanbol.commons.solr.utils.SolrUtil;
+import org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum;
+import org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum;
 import org.apache.stanbol.entityhub.yard.solr.model.FieldMapper;
+import org.apache.stanbol.entityhub.yard.solr.model.IndexDataType;
 import org.apache.stanbol.entityhub.yard.solr.model.IndexField;
 import org.apache.stanbol.entityhub.yard.solr.query.ConstraintTypePosition;
 import org.apache.stanbol.entityhub.yard.solr.query.ConstraintTypePosition.PositionType;
@@ -52,15 +55,17 @@ public class LangEncoder implements Inde
         } else {
             languages = value.getLanguages();
         }
-        if (!languages.isEmpty()) {
-            for (String prefix : fieldMapper.encodeLanguages(value)) {
-                constraint.addEncoded(PREFIX, SolrUtil.escapeSolrSpecialChars(prefix));
+        if(value.getDataType().equals(IndexDataTypeEnum.TXT.getIndexType())){
+            if (!languages.isEmpty()) {
+                for (String prefix : fieldMapper.encodeLanguages(value)) {
+                    constraint.addEncoded(PREFIX, SolrUtil.escapeSolrSpecialChars(prefix));
+                }
+            } else { // default
+                // search in the language merger field of the default language
+                constraint.addEncoded(PREFIX,
+                    SolrUtil.escapeSolrSpecialChars(fieldMapper.getLanguageMergerField(null)));
             }
-        } else { // default
-            // search in the language merger field of the default language
-            constraint.addEncoded(PREFIX,
-                SolrUtil.escapeSolrSpecialChars(fieldMapper.getLanguageMergerField(null)));
-        }
+        } //else no Text field -> do not add language prefixes
     }
 
     @Override

Modified: incubator/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java?rev=1337141&r1=1337140&r2=1337141&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java (original)
+++ incubator/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java Fri May 11 13:13:42 2012
@@ -16,12 +16,20 @@
  */
 package org.apache.stanbol.entityhub.yard.solr.query;
 
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.solr.analysis.ICUTokenizerFactory;
+import org.apache.solr.analysis.TokenizerFactory;
 import org.apache.stanbol.commons.solr.utils.SolrUtil;
 import org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum;
 import org.apache.stanbol.entityhub.yard.solr.model.IndexValue;
@@ -29,7 +37,20 @@ import org.apache.stanbol.entityhub.yard
 
 public final class QueryUtils {
     private QueryUtils() {}
-
+    /**
+     * The {@link TokenizerFactory} used to create Tokens for parsed 
+     * {@link IndexValue#getValue()} in case <code>false</code> is parsed for
+     * the tokenize property of {@link #encodeQueryValue(IndexValue, boolean)}.
+     * <p>
+     * Currently the {@link ICUTokenizerFactory} is used for Tokenizing.
+     */
+    private final static TokenizerFactory tokenizerFactory = new ICUTokenizerFactory();
+    /**
+     * Regex patter that searches for Wildcard chars '*' and '?' excluding
+     * escaped versions '\*' and '\?'
+     */
+    private final static Pattern wILDCARD_QUERY_CHAR_PATTERN = Pattern.compile("[^\\\\][\\*\\?]");
+    
     /**
      * This method encodes a parsed index value as needed for queries.
      * <p>
@@ -74,18 +95,27 @@ public final class QueryUtils {
             value = SolrUtil.escapeWildCardString(value);
         }
         if (IndexDataTypeEnum.TXT.getIndexType().equals(indexValue.getType())) {
-            if(!escape){ 
-                value = value.toLowerCase();
-            } //rw: 20120314: respect case sensitivity for escaped (non wildcard)
-            Collection<String> tokens = new HashSet<String>(
-                    Arrays.asList(value.split(" ")));
-            tokens.remove("");
-            queryConstraints = tokens.toArray(new String[tokens.size()]);
+            if(escape) { 
+                //value does not contain '*' and '?' as they would be escaped.
+                queryConstraints = new String[] { value.indexOf(' ')>=0 ?
+                        '"'+value+'"' : value
+                };
+            } else { //non escaped strings might contain wildcard chars '*', '?'
+                //those need to be treated specially (STANBOL-607)
+                //Change to 2nd param to false after switching to Solr 3.6+ (see SOLR-2438)
+                queryConstraints = parseWildcardQueryTerms(value, true);
+            }
         } else if (IndexDataTypeEnum.STR.getIndexType().equals(indexValue.getType())) {
-            if(!escape){ 
-                value = value.toLowerCase();
-            } //rw: 20120314: respect case sensitivity for escaped (non wildcard)
-            queryConstraints = new String[] {value.replace(' ', '+')};
+            if(escape){ 
+                 //rw: 20120314: respect case sensitivity for escaped (non wildcard)
+                queryConstraints = new String[] { value.indexOf(' ')>=0 ?
+                        '"'+value+'"' : value
+                };
+            } else { //encode non
+                //rw: 20120314: respect case sensitivity for escaped (non wildcard)
+                //Change to 2nd param to false after switching to Solr 3.6+ (see SOLR-2438)
+                queryConstraints = parseWildcardQueryTerms(value, true);
+            }
         } else {
             queryConstraints = new String[] {value};
         }
@@ -125,4 +155,104 @@ public final class QueryUtils {
         }
         return indexValues;
     }
+    
+    public static void main(String[] args) throws IOException {
+        String value = "This is a te?t for multi* Toke? Wildc\\*adrd Se?rche*";
+        System.out.println(Arrays.toString(parseWildcardQueryTerms(value,true)));
+    }
+
+    /**
+     * Parses query terms for Wildcard queries as described in the first
+     * comment of STANBOL-607. <p>
+     * As an example the String:
+     * <code><pre>
+     *     "This is a te?t for multi* Toke? Wildc\*adrd Se?rche*
+     * </pre></code>
+     * is converted in the query terms
+     * <code><pre>
+     *     ["This is a","te?t","multi*","toke?","Wildc\*adrd","se?rche*"]
+     * </pre></code>
+     * NOTE: that tokens that include are converted to lower case
+     * @param value the value
+     * @param loewercaseWildcardTokens if query elements that include a wildcard
+     * should be converted to lower case.
+     * @return the query terms
+     * @throws IOException
+     */
+    private static String[] parseWildcardQueryTerms(String value,boolean loewercaseWildcardTokens) {
+        //This assumes that the Tokenizer does tokenize '*' and '?',
+        //what makes it a little bit tricky. 
+        Tokenizer tokenizer = tokenizerFactory.create(new StringReader(value));
+        Matcher m = wILDCARD_QUERY_CHAR_PATTERN.matcher(value);
+        int next = m.find()?m.start()+1:-1;
+        if(next < 0){ //No wildcard
+            return new String[]{value};
+        } 
+        ArrayList<String> queryElements = new ArrayList<String>(5);
+        int lastAdded = -1;
+        int lastOffset = 0;
+        boolean foundWildcard = false;
+        //Lucene tokenizer are really low level ...
+        try {
+            while(tokenizer.incrementToken()){
+                //only interested in the start/end indexes of tokens
+                OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);
+                if(lastAdded < 0){ //rest with this token
+                    lastAdded = offset.startOffset();
+                }
+                if(foundWildcard){ //wildcard present in the current token
+                    //two cases: "wildcar? at the end", "wild?ard within the word"
+                    // (1) [wildcar,at,the,end] : In this case this is called with
+                    //      'at' as active Token and we need write "wildcar?" as
+                    //      query term
+                    // (2) [wild,ard,within,the,word]: In this case this is called with
+                    //      'ard' as active Tiken and we need write "wild?ard" as
+                    //      query term.
+                    if(offset.startOffset() > lastOffset+1) {//(1)
+                        String queryElement = value.substring(lastAdded,lastOffset+1);
+                        if(loewercaseWildcardTokens){
+                            queryElement = queryElement.toLowerCase();
+                        }
+                        queryElements.add(queryElement);
+                        lastAdded = offset.startOffset(); //previous token consumed
+                        //set to the start of the current token
+                        foundWildcard = false;
+                    } else if(next != offset.endOffset()){ //(2)
+                        String queryElement = value.substring(lastAdded,offset.endOffset());
+                        if(loewercaseWildcardTokens){
+                            queryElement = queryElement.toLowerCase();
+                        }
+                        queryElements.add(queryElement);
+                        lastAdded = -1; //consume the current token
+                        foundWildcard = false;
+                    }
+                }
+                if(next == offset.endOffset()){ //end of current token is '*' or '?'
+                    next = m.find()?m.start()+1:-1; //search next '*', '?' in value
+                    //we need to write all tokens previous to the current (if any)
+                    //NOTE: ignore if foundWildcard is TRUE (multiple wildcards in
+                    //      a single word
+                    if(!foundWildcard && lastAdded<lastOffset){
+                        queryElements.add(value.substring(lastAdded,lastOffset));
+                        lastAdded = offset.startOffset();
+                    }//else multiple wildcards in a single token
+                    foundWildcard = true;
+                }
+                lastOffset = offset.endOffset();
+            }
+        } catch (IOException e) {
+            //StringReader can not throw IOExceptions
+            throw new IllegalStateException(e);
+        }
+        if(lastAdded >= 0 && lastAdded < value.length()){
+            String queryElement = value.substring(lastAdded,value.length());
+            if(foundWildcard && loewercaseWildcardTokens){
+                queryElement = queryElement.toLowerCase();
+            }
+            queryElements.add(queryElement);
+        }
+        return queryElements.toArray(new String[queryElements.size()]);
+    }
+
+
 }

Modified: incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/entityhub/it/query/DbpediaQueryTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/entityhub/it/query/DbpediaQueryTest.java?rev=1337141&r1=1337140&r2=1337141&view=diff
==============================================================================
--- incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/entityhub/it/query/DbpediaQueryTest.java (original)
+++ incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/entityhub/it/query/DbpediaQueryTest.java Fri May 11 13:13:42 2012
@@ -293,6 +293,32 @@ public abstract class DbpediaQueryTest e
         executeQuery(test);  
     }
     @Test
+    public void testMultiWordWildcardTextConstraints() throws IOException, JSONException {
+        //this is specially for issue described in the first comment of
+        //STANBOL-607
+        FieldQueryTestCase test = new FieldQueryTestCase(
+            "{ "+
+                "'selected': ["+
+                    "'http:\\/\\/www.w3.org\\/2000\\/01\\/rdf-schema#label'],"+
+                "'offset': '0',"+
+                "'limit': '3',"+
+                "'constraints': [{ "+
+                    "'type': 'text', "+
+                    "'language': 'de', "+
+                    "'patternType': 'wildcard', "+
+                    "'text': 'Frankf* am Main', "+
+                    "'field': 'http:\\/\\/www.w3.org\\/2000\\/01\\/rdf-schema#label' "+
+                "}]"+
+             "}",
+             Arrays.asList( //list of expected results
+                 "http://dbpedia.org/resource/Frankfurt"),
+             Arrays.asList( //list of required fields for results
+                "http://www.w3.org/2000/01/rdf-schema#label"));
+        //now execute the test
+        executeQuery(test);  
+    }
+    
+    @Test
     public void testFieldQueryValueConstraints() throws IOException, JSONException {
         FieldQueryTestCase test = new FieldQueryTestCase(
             "{ "+