You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2007/03/02 00:15:22 UTC

svn commit: r513517 - in /lucene/solr/trunk: CHANGES.txt src/java/org/apache/solr/analysis/SynonymFilter.java src/test/org/apache/solr/analysis/TestSynonymFilter.java

Author: yonik
Date: Thu Mar  1 13:36:36 2007
New Revision: 513517

URL: http://svn.apache.org/viewvc?view=rev&rev=513517
Log:
fix synonymfilter offsets: SOLR-167

Modified:
    lucene/solr/trunk/CHANGES.txt
    lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java
    lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java

Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?view=diff&rev=513517&r1=513516&r2=513517
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Thu Mar  1 13:36:36 2007
@@ -175,6 +175,9 @@
 
  7. SOLR-168: Fix display positioning of multiple tokens at the same
     position in analysis.jsp (yonik)
+
+ 8. SOLR-167: The SynonymFilter sometimes generated incorrect offsets when
+    multi token synonyms were mached in the source text. (yonik) 
  
 Other Changes
  1. Updated to Lucene 2.1

Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java?view=diff&rev=513517&r1=513516&r2=513517
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java Thu Mar  1 13:36:36 2007
@@ -86,8 +86,7 @@
 
       // OK, we matched a token, so find the longest match.
 
-      // since matched is only used for matches >= 2, defer creation until now
-      if (matched==null) matched=new LinkedList();
+      matched = new LinkedList();
 
       SynonymMap result = match((SynonymMap)o);
 

Modified: lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java?view=diff&rev=513517&r1=513516&r2=513517
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java Thu Mar  1 13:36:36 2007
@@ -43,6 +43,7 @@
    * a b c  =>  returns List<Token> [a,b,c]
    * a/b   => tokens a and b share the same spot (b.positionIncrement=0)
    * a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0)
+   * a,1,10,11  => "a" with positionIncrement=1, startOffset=10, endOffset=11
    */
   public List tokens(String str) {
     String[] arr = str.split(" ");
@@ -50,8 +51,32 @@
     for (int i=0; i<arr.length; i++) {
       String[] toks = arr[i].split("/");
       String[] params = toks[0].split(",");
-      Token t = new Token(params[0],0,0,"TEST");
-      if (params.length > 1) t.setPositionIncrement(Integer.parseInt(params[1]));
+
+      int posInc;
+      int start;
+      int end;
+
+      if (params.length > 1) {
+        posInc = Integer.parseInt(params[1]);
+      } else {
+        posInc = 1;
+      }
+
+      if (params.length > 2) {
+        start = Integer.parseInt(params[2]);
+      } else {
+        start = 0;
+      }
+
+      if (params.length > 3) {
+        end = Integer.parseInt(params[3]);
+      } else {
+        end = start + params[0].length();
+      }
+
+      Token t = new Token(params[0],start,end,"TEST");
+      t.setPositionIncrement(posInc);
+      
       result.add(t);
       for (int j=1; j<toks.length; j++) {
         t = new Token(toks[j],0,0,"TEST");
@@ -91,27 +116,42 @@
 
 
   public void assertTokEqual(List a, List b) {
-    assertTokEq(a,b);
-    assertTokEq(b,a);
+    assertTokEq(a,b,false);
+    assertTokEq(b,a,false);
   }
 
-  private void assertTokEq(List a, List b) {
+  public void assertTokEqualOff(List a, List b) {
+    assertTokEq(a,b,true);
+    assertTokEq(b,a,true);
+  }
+
+  private void assertTokEq(List a, List b, boolean checkOff) {
     int pos=0;
     for (Iterator iter = a.iterator(); iter.hasNext();) {
       Token tok = (Token)iter.next();
       pos += tok.getPositionIncrement();
-      if (!tokAt(b, tok.termText(), pos)) {
+      if (!tokAt(b, tok.termText(), pos
+              , checkOff ? tok.startOffset() : -1
+              , checkOff ? tok.endOffset() : -1
+              )) 
+      {
         fail(a + "!=" + b);
       }
     }
   }
 
-  public boolean tokAt(List lst, String val, int tokPos) {
+  public boolean tokAt(List lst, String val, int tokPos, int startOff, int endOff) {
     int pos=0;
     for (Iterator iter = lst.iterator(); iter.hasNext();) {
       Token tok = (Token)iter.next();
       pos += tok.getPositionIncrement();
-      if (pos==tokPos && tok.termText().equals(val)) return true;
+      if (pos==tokPos && tok.termText().equals(val)
+          && (startOff==-1 || tok.startOffset()==startOff)
+          && (endOff==-1 || tok.endOffset()==endOff)
+           )
+      {
+        return true;
+      }
     }
     return false;
   }
@@ -281,5 +321,25 @@
     assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0/c c2,2"));
   }
 
+
+  public void testOffsetBug() throws IOException {
+    // With the following rules:
+    // a a=>b
+    // x=>y
+    // analysing "a x" causes "y" to have a bad offset (end less than start)
+    // SOLR-167
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = false;
+    boolean merge = true;
+
+    map.add(strings("a a"), tokens("b"), orig, merge);
+    map.add(strings("x"), tokens("y"), orig, merge);
+
+    System.out.println(getTokList(map,"a,1,0,1 a,1,2,3 x,1,4,5",false));
+
+    // "a a x" => "b y"
+    assertTokEqualOff(getTokList(map,"a,1,0,1 a,1,2,3 x,1,4,5",false), tokens("b,1,0,3 y,1,4,5"));
+  }
 
 }