You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2006/07/05 21:36:09 UTC

svn commit: r419321 - in /incubator/solr/trunk: CHANGES.txt src/java/org/apache/solr/analysis/WordDelimiterFilter.java src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java src/test/test-files/solr/conf/schema.xml

Author: yonik
Date: Wed Jul  5 12:36:08 2006
New Revision: 419321

URL: http://svn.apache.org/viewvc?rev=419321&view=rev
Log:
WordDelimiterFilter could loose position info

Added:
    incubator/solr/trunk/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
Modified:
    incubator/solr/trunk/CHANGES.txt
    incubator/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
    incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml

Modified: incubator/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/CHANGES.txt?rev=419321&r1=419320&r2=419321&view=diff
==============================================================================
--- incubator/solr/trunk/CHANGES.txt (original)
+++ incubator/solr/trunk/CHANGES.txt Wed Jul  5 12:36:08 2006
@@ -43,6 +43,7 @@
  2. Added escaping of attribute values in the XML response (Erik Hatcher)
  3. Added empty extractTerms() to FunctionQuery to enable use in
     a MultiSearcher (Yonik)
+ 4. WordDelimiterFilter sometimes lost token positionIncrement information
 
 Other Changes
  1. Upgrade to Lucene 2.0 nightly build 2006-06-22, lucene SVN revision 416224,

Modified: incubator/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java?rev=419321&r1=419320&r2=419321&view=diff
==============================================================================
--- incubator/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java (original)
+++ incubator/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java Wed Jul  5 12:36:08 2006
@@ -123,10 +123,6 @@
     }
   }
 
-  private int charType(String s, int pos) {
-    return charType(s.charAt(pos));
-  }
-
   // use the type of the first char as the type
   // of the token.
   private int tokType(Token t) {
@@ -170,16 +166,18 @@
     // Would it actually be faster to check for the common form
     // of isLetter() isLower()*, and then backtrack if it doesn't match?
 
+    int origPosIncrement;
     while(true) {
       Token t = input.next();
       if (t == null) return null;
 
       String s = t.termText();
-      int off=t.startOffset();
       int start=0;
       int end=s.length();
       if (end==0) continue;
 
+      origPosIncrement = t.getPositionIncrement();
+
       // Avoid calling charType more than once for each char (basically
       // avoid any backtracking).
       // makes code slightly more difficult, but faster.
@@ -273,6 +271,7 @@
             // optimization... if this is the only token,
             // return it immediately.
             if (queue.size()==0) {
+              newtok.setPositionIncrement(origPosIncrement);
               return newtok;
             }
 
@@ -376,7 +375,9 @@
     // System.out.println("##########AFTER COMBINATIONS:"+ str(queue));
 
     queuePos=1;
-    return queue.get(0);
+    Token tok = queue.get(0);
+    tok.setPositionIncrement(origPosIncrement);
+    return tok;
   }
 
 
@@ -416,29 +417,10 @@
     }
   }
 
-  private String str(List<Token> lst) {
-    StringBuilder sb = new StringBuilder();
-    sb.append('{');
-    for (Token t : lst) {
-      sb.append('(');
-      sb.append('"');
-      sb.append(t.termText());
-      sb.append("\",increment=");
-      sb.append(Integer.toString(t.getPositionIncrement()));
-      sb.append(')');
-
-      sb.append(',');
-    }
-    sb.append('}');
-    return sb.toString();
-  }
-
-
 
   // questions:
   // negative numbers?  -42 indexed as just 42?
   // dollar sign?  $42
   // percent sign?  33%
   // downsides:  if source text is "powershot" then a query of "PowerShot" won't match!
-
 }

Added: incubator/solr/trunk/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java?rev=419321&view=auto
==============================================================================
--- incubator/solr/trunk/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java (added)
+++ incubator/solr/trunk/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java Wed Jul  5 12:36:08 2006
@@ -0,0 +1,49 @@
+package org.apache.solr.analysis;
+
+import org.apache.solr.util.AbstractSolrTestCase;
+import org.apache.solr.util.TestHarness;
+import org.apache.solr.request.SolrQueryRequest;
+
+/**
+ * New WordDelimiterFilter tests... most of the tests are in ConvertedLegacyTest
+ */
+public class TestWordDelimiterFilter extends AbstractSolrTestCase {
+  public String getSchemaFile() { return "solr/conf/schema.xml"; }
+  public String getSolrConfigFile() { return "solr/conf/solrconfig.xml"; }
+
+
+  public void posTst(String v1, String v2, String s1, String s2) {
+    assertU(adoc("id",  "42",
+                 "subword", v1,
+                 "subword", v2));
+    assertU(commit());
+
+    // there is a positionIncrementGap of 100 between field values, so
+    // we test if that was maintained.
+    assertQ("position increment lost",
+            req("+id:42 +subword:\"" + s1 + ' ' + s2 + "\"~90")
+            ,"//result[@numFound=0]"
+    );
+    assertQ("position increment lost",
+            req("+id:42 +subword:\"" + s1 + ' ' + s2 + "\"~110")
+            ,"//result[@numFound=1]"
+    );
+  }
+
+
+  public void testRetainPositionIncrement() {
+    posTst("foo","bar","foo","bar");
+    posTst("-foo-","-bar-","foo","bar");
+    posTst("foo","bar","-foo-","-bar-");
+
+    posTst("123","456","123","456");
+    posTst("/123/","/456/","123","456");
+
+    posTst("/123/abc","qwe/456/","abc","qwe");
+
+    posTst("zoo-foo","bar-baz","foo","bar");
+    posTst("zoo-foo-123","456-bar-baz","foo","bar");
+  }
+
+
+}

Modified: incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml?rev=419321&r1=419320&r2=419321&view=diff
==============================================================================
--- incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml (original)
+++ incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml Wed Jul  5 12:36:08 2006
@@ -174,7 +174,7 @@
       </analyzer>
     </fieldtype>
 
-    <fieldtype name="subword" class="solr.TextField">
+    <fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
       <analyzer type="index">
           <tokenizer class="solr.WhitespaceTokenizerFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>