You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2009/11/23 18:18:17 UTC

svn commit: r883421 - in /lucene/solr/trunk: CHANGES.txt src/java/org/apache/solr/search/ValueSourceParser.java src/java/org/apache/solr/search/function/distance/StringDistanceFunction.java src/test/org/apache/solr/search/function/TestFunctionQuery.java

Author: gsingers
Date: Mon Nov 23 17:18:16 2009
New Revision: 883421

URL: http://svn.apache.org/viewvc?rev=883421&view=rev
Log:
SOLR-1302: Why should numbers get to have all the fun, add String distance functions, too

Added:
    lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/StringDistanceFunction.java   (with props)
Modified:
    lucene/solr/trunk/CHANGES.txt
    lucene/solr/trunk/src/java/org/apache/solr/search/ValueSourceParser.java
    lucene/solr/trunk/src/test/org/apache/solr/search/function/TestFunctionQuery.java

Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=883421&r1=883420&r2=883421&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Mon Nov 23 17:18:16 2009
@@ -34,7 +34,7 @@
 New Features
 ----------------------
 
-* SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan and Euclidean.
+* SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan, Euclidean and String (using the StringDistance methods in the Lucene Spellchecker).
   Also added geohash(), deg() and rad() convenience functions. See http://wiki.apache.org/solr/FunctionQuery. (gsingers)
 
 * SOLR-1553: New dismax parser implementation (accessible as "edismax")

Modified: lucene/solr/trunk/src/java/org/apache/solr/search/ValueSourceParser.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/ValueSourceParser.java?rev=883421&r1=883420&r2=883421&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/ValueSourceParser.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/ValueSourceParser.java Mon Nov 23 17:18:16 2009
@@ -20,6 +20,10 @@
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.spell.JaroWinklerDistance;
+import org.apache.lucene.search.spell.LevensteinDistance;
+import org.apache.lucene.search.spell.NGramDistance;
+import org.apache.lucene.search.spell.StringDistance;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.schema.DateField;
@@ -34,6 +38,7 @@
 import org.apache.solr.search.function.distance.VectorDistanceFunction;
 import org.apache.solr.search.function.distance.GeohashHaversineFunction;
 import org.apache.solr.search.function.distance.GeohashFunction;
+import org.apache.solr.search.function.distance.StringDistanceFunction;
 import org.apache.solr.util.plugin.NamedListInitializedPlugin;
 
 import java.io.IOException;
@@ -255,6 +260,30 @@
         return new GeohashFunction(lat, lon);
       }
     });
+    addParser("strdist", new ValueSourceParser() {
+      public ValueSource parse(FunctionQParser fp) throws ParseException {
+
+        ValueSource str1 = fp.parseValueSource();
+        ValueSource str2 = fp.parseValueSource();
+        String distClass = fp.parseArg();
+
+        StringDistance dist = null;
+        if (distClass.equalsIgnoreCase("jw")) {
+          dist = new JaroWinklerDistance();
+        } else if (distClass.equalsIgnoreCase("edit")) {
+          dist = new LevensteinDistance();
+        } else if (distClass.equalsIgnoreCase("ngram")) {
+          int ngram = 2;
+          if (fp.hasMoreArguments()) {
+            ngram = fp.parseInt();
+          }
+          dist = new NGramDistance(ngram);
+        } else {
+          dist = (StringDistance) fp.req.getCore().getResourceLoader().newInstance(distClass);
+        }
+        return new StringDistanceFunction(str1, str2, dist);
+      }
+    });
 
     addParser(new DoubleParser("rad") {
       public double func(int doc, DocValues vals) {

Added: lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/StringDistanceFunction.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/StringDistanceFunction.java?rev=883421&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/StringDistanceFunction.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/StringDistanceFunction.java Mon Nov 23 17:18:16 2009
@@ -0,0 +1,96 @@
+package org.apache.solr.search.function.distance;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.spell.StringDistance;
+import org.apache.solr.search.function.DocValues;
+import org.apache.solr.search.function.ValueSource;
+
+import java.io.IOException;
+import java.util.Map;
+
+
+/**
+ *
+ *
+ **/
+public class StringDistanceFunction extends ValueSource {
+  protected ValueSource str1, str2;
+  protected StringDistance dist;
+
+  /**
+   * @param str1
+   * @param str2
+   * @param measure
+   */
+  public StringDistanceFunction(ValueSource str1, ValueSource str2, StringDistance measure) {
+    this.str1 = str1;
+    this.str2 = str2;
+    dist = measure;
+
+
+  }
+
+  @Override
+  public DocValues getValues(Map context, IndexReader reader) throws IOException {
+    final DocValues str1DV = str1.getValues(context, reader);
+    final DocValues str2DV = str2.getValues(context, reader);
+    return new DocValues() {
+
+      public float floatVal(int doc) {
+        return (float) dist.getDistance(str1DV.strVal(doc), str2DV.strVal(doc));
+      }
+
+      public int intVal(int doc) {
+        return (int) doubleVal(doc);
+      }
+
+      public long longVal(int doc) {
+        return (long) doubleVal(doc);
+      }
+
+      public double doubleVal(int doc) {
+        return (double) floatVal(doc);
+      }
+
+      @Override
+      public String toString(int doc) {
+        StringBuilder sb = new StringBuilder();
+        sb.append("strdist").append('(');
+        sb.append(str1DV.toString(doc)).append(',').append(str2DV.toString(doc))
+                .append(", dist=").append(dist.getClass().getName());
+        sb.append(')');
+        return sb.toString();
+      }
+    };
+  }
+
+  public String description() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("strdist").append('(');
+    sb.append(str1).append(',').append(str2).append(", dist=").append(dist.getClass().getName());
+    sb.append(')');
+    return sb.toString();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (!(o instanceof StringDistanceFunction)) return false;
+
+    StringDistanceFunction that = (StringDistanceFunction) o;
+
+    if (!dist.equals(that.dist)) return false;
+    if (!str1.equals(that.str1)) return false;
+    if (!str2.equals(that.str2)) return false;
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    int result = str1.hashCode();
+    result = 31 * result + str2.hashCode();
+    result = 31 * result + dist.hashCode();
+    return result;
+  }
+}

Propchange: lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/StringDistanceFunction.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/solr/trunk/src/test/org/apache/solr/search/function/TestFunctionQuery.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/search/function/TestFunctionQuery.java?rev=883421&r1=883420&r2=883421&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/search/function/TestFunctionQuery.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/search/function/TestFunctionQuery.java Mon Nov 23 17:18:16 2009
@@ -347,6 +347,14 @@
     assertQ(req("fl", "*,score", "q", "{!func}deg(y_td)", "fq", "id:3"), "//float[@name='score']='45.0'");
   }
 
+  public void testStrDistance() throws Exception {
+    assertU(adoc("id", "1", "x_s", "foil"));
+    assertU(commit());
+    assertQ(req("fl", "*,score", "q", "{!func}strdist(x_s, 'foit', edit)", "fq", "id:1"), "//float[@name='score']='0.75'");
+    assertQ(req("fl", "*,score", "q", "{!func}strdist(x_s, 'foit', jw)", "fq", "id:1"), "//float[@name='score']='0.8833333'");
+    assertQ(req("fl", "*,score", "q", "{!func}strdist(x_s, 'foit', ngram, 2)", "fq", "id:1"), "//float[@name='score']='0.875'");
+  }
+
   public void dofunc(String func, double val) throws Exception {
     // String sval = Double.toString(val);
     String sval = Float.toString((float)val);