You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2006/01/30 01:00:01 UTC

svn commit: r373402 - in /incubator/solr/trunk/src/test/org: ./ apache/ apache/solr/ apache/solr/analysis/ apache/solr/analysis/TestSynonymFilter.java

Author: yoavs
Date: Sun Jan 29 15:59:59 2006
New Revision: 373402

URL: http://svn.apache.org/viewcvs?rev=373402&view=rev
Log:
Moved the one test class from src/normal stuff to src/test.  Still need to rename the org.apache.lucene package classes to org.apache.solr (or do we want org.apache.lucene.solr?)

Added:
    incubator/solr/trunk/src/test/org/
    incubator/solr/trunk/src/test/org/apache/
    incubator/solr/trunk/src/test/org/apache/solr/
    incubator/solr/trunk/src/test/org/apache/solr/analysis/
    incubator/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java

Added: incubator/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java
URL: http://svn.apache.org/viewcvs/incubator/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java?rev=373402&view=auto
==============================================================================
--- incubator/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java (added)
+++ incubator/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java Sun Jan 29 15:59:59 2006
@@ -0,0 +1,282 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import junit.framework.TestCase;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * @author yonik
+ * @version $Id: TestSynonymFilter.java,v 1.2 2005/12/12 18:07:23 yonik Exp $
+ */
+public class TestSynonymFilter extends TestCase {
+
+  public List strings(String str) {
+    String[] arr = str.split(" ");
+    return Arrays.asList(arr);
+  }
+
+  /***
+   * Return a list of tokens according to a test string format:
+   * a b c  =>  returns List<Token> [a,b,c]
+   * a/b   => tokens a and b share the same spot (b.positionIncrement=0)
+   * a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0)
+   */
+  public List tokens(String str) {
+    String[] arr = str.split(" ");
+    List result = new ArrayList();
+    for (int i=0; i<arr.length; i++) {
+      String[] toks = arr[i].split("/");
+      String[] params = toks[0].split(",");
+      Token t = new Token(params[0],0,0,"TEST");
+      if (params.length > 1) t.setPositionIncrement(Integer.parseInt(params[1]));
+      result.add(t);
+      for (int j=1; j<toks.length; j++) {
+        t = new Token(toks[j],0,0,"TEST");
+        t.setPositionIncrement(0);
+        result.add(t);
+      }
+    }
+    return result;
+  }
+
+  public List getTokList(SynonymMap dict, String input, boolean includeOrig) throws IOException {
+    ArrayList lst = new ArrayList();
+    final List toks = tokens(input);
+    TokenStream ts = new TokenStream() {
+      Iterator iter = toks.iterator();
+      public Token next() throws IOException {
+        return iter.hasNext() ? (Token)iter.next() : null;
+      }
+    };
+
+    SynonymFilter sf = new SynonymFilter(ts, dict, true);
+
+    while(true) {
+      Token t = sf.next();
+      if (t==null) return lst;
+      lst.add(t);
+    }
+  }
+
+  public List tok2str(List tokLst) {
+    ArrayList lst = new ArrayList();
+    for (Iterator iter = tokLst.iterator(); iter.hasNext();) {
+      lst.add(((Token)(iter.next())).termText());
+    }
+    return lst;
+  }
+
+
+  public void assertTokEqual(List a, List b) {
+    assertTokEq(a,b);
+    assertTokEq(b,a);
+  }
+
+  private void assertTokEq(List a, List b) {
+    int pos=0;
+    for (Iterator iter = a.iterator(); iter.hasNext();) {
+      Token tok = (Token)iter.next();
+      pos += tok.getPositionIncrement();
+      if (!tokAt(b, tok.termText(), pos)) {
+        fail(a + "!=" + b);
+      }
+    }
+  }
+
+  public boolean tokAt(List lst, String val, int tokPos) {
+    int pos=0;
+    for (Iterator iter = lst.iterator(); iter.hasNext();) {
+      Token tok = (Token)iter.next();
+      pos += tok.getPositionIncrement();
+      if (pos==tokPos && tok.termText().equals(val)) return true;
+    }
+    return false;
+  }
+
+
+  public void testMatching() throws IOException {
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = false;
+    boolean merge = true;
+    map.add(strings("a b"), tokens("ab"), orig, merge);
+    map.add(strings("a c"), tokens("ac"), orig, merge);
+    map.add(strings("a"), tokens("aa"), orig, merge);
+    map.add(strings("b"), tokens("bb"), orig, merge);
+    map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
+    map.add(strings("x c"), tokens("xc"), orig, merge);
+
+    // System.out.println(map);
+    // System.out.println(getTokList(map,"a",false));
+
+    assertTokEqual(getTokList(map,"$",false), tokens("$"));
+    assertTokEqual(getTokList(map,"a",false), tokens("aa"));
+    assertTokEqual(getTokList(map,"a $",false), tokens("aa $"));
+    assertTokEqual(getTokList(map,"$ a",false), tokens("$ aa"));
+    assertTokEqual(getTokList(map,"a a",false), tokens("aa aa"));
+    assertTokEqual(getTokList(map,"b",false), tokens("bb"));
+    assertTokEqual(getTokList(map,"z x c v",false), tokens("zxcv"));
+    assertTokEqual(getTokList(map,"z x c $",false), tokens("z xc $"));
+
+    // repeats
+    map.add(strings("a b"), tokens("ab"), orig, merge);
+    map.add(strings("a b"), tokens("ab"), orig, merge);
+    assertTokEqual(getTokList(map,"a b",false), tokens("ab"));
+
+    // check for lack of recursion
+    map.add(strings("zoo"), tokens("zoo"), orig, merge);
+    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo zoo $ zoo"));
+    map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
+    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo zoo zoo zoo $ zoo zoo"));
+  }
+
+  public void testIncludeOrig() throws IOException {
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = true;
+    boolean merge = true;
+    map.add(strings("a b"), tokens("ab"), orig, merge);
+    map.add(strings("a c"), tokens("ac"), orig, merge);
+    map.add(strings("a"), tokens("aa"), orig, merge);
+    map.add(strings("b"), tokens("bb"), orig, merge);
+    map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
+    map.add(strings("x c"), tokens("xc"), orig, merge);
+
+    // System.out.println(map);
+    // System.out.println(getTokList(map,"a",false));
+
+    assertTokEqual(getTokList(map,"$",false), tokens("$"));
+    assertTokEqual(getTokList(map,"a",false), tokens("a/aa"));
+    assertTokEqual(getTokList(map,"a",false), tokens("a/aa"));
+    assertTokEqual(getTokList(map,"$ a",false), tokens("$ a/aa"));
+    assertTokEqual(getTokList(map,"a $",false), tokens("a/aa $"));
+    assertTokEqual(getTokList(map,"$ a !",false), tokens("$ a/aa !"));
+    assertTokEqual(getTokList(map,"a a",false), tokens("a/aa a/aa"));
+    assertTokEqual(getTokList(map,"b",false), tokens("b/bb"));
+    assertTokEqual(getTokList(map,"z x c v",false), tokens("z/zxcv x c v"));
+    assertTokEqual(getTokList(map,"z x c $",false), tokens("z x/xc c $"));
+
+    // check for lack of recursion
+    map.add(strings("zoo zoo"), tokens("zoo"), orig, merge);
+    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo/zoo zoo/zoo $ zoo/zoo"));
+    map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
+    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo/zoo zoo $ zoo/zoo zoo"));
+  }
+
+
+  public void testMapMerge() throws IOException {
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = false;
+    boolean merge = true;
+    map.add(strings("a"), tokens("a5,5"), orig, merge);
+    map.add(strings("a"), tokens("a3,3"), orig, merge);
+    // System.out.println(map);
+    assertTokEqual(getTokList(map,"a",false), tokens("a3 a5,2"));
+
+    map.add(strings("b"), tokens("b3,3"), orig, merge);
+    map.add(strings("b"), tokens("b5,5"), orig, merge);
+    //System.out.println(map);
+    assertTokEqual(getTokList(map,"b",false), tokens("b3 b5,2"));
+
+
+    map.add(strings("a"), tokens("A3,3"), orig, merge);
+    map.add(strings("a"), tokens("A5,5"), orig, merge);
+    assertTokEqual(getTokList(map,"a",false), tokens("a3/A3 a5,2/A5"));
+
+    map.add(strings("a"), tokens("a1"), orig, merge);
+    assertTokEqual(getTokList(map,"a",false), tokens("a1 a3,2/A3 a5,2/A5"));
+
+    map.add(strings("a"), tokens("a2,2"), orig, merge);
+    map.add(strings("a"), tokens("a4,4 a6,2"), orig, merge);
+    assertTokEqual(getTokList(map,"a",false), tokens("a1 a2 a3/A3 a4 a5/A5 a6"));
+  }
+
+
+  public void testOverlap() throws IOException {
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = false;
+    boolean merge = true;
+    map.add(strings("qwe"), tokens("qq/ww/ee"), orig, merge);
+    map.add(strings("qwe"), tokens("xx"), orig, merge);
+    map.add(strings("qwe"), tokens("yy"), orig, merge);
+    map.add(strings("qwe"), tokens("zz"), orig, merge);
+    assertTokEqual(getTokList(map,"$",false), tokens("$"));
+    assertTokEqual(getTokList(map,"qwe",false), tokens("qq/ww/ee/xx/yy/zz"));
+
+    // test merging within the map
+
+    map.add(strings("a"), tokens("a5,5 a8,3 a10,2"), orig, merge);
+    map.add(strings("a"), tokens("a3,3 a7,4 a9,2 a11,2 a111,100"), orig, merge);
+    assertTokEqual(getTokList(map,"a",false), tokens("a3 a5,2 a7,2 a8 a9 a10 a11 a111,100"));
+  }
+
+  public void testOffsets() throws IOException {
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = false;
+    boolean merge = true;
+
+    // test that generated tokens start at the same offset as the original
+    map.add(strings("a"), tokens("aa"), orig, merge);
+    assertTokEqual(getTokList(map,"a,5",false), tokens("aa,5"));
+    assertTokEqual(getTokList(map,"a,0",false), tokens("aa,0"));
+
+    // test that offset of first replacement is ignored (always takes the orig offset)
+    map.add(strings("b"), tokens("bb,100"), orig, merge);
+    assertTokEqual(getTokList(map,"b,5",false), tokens("bb,5"));
+    assertTokEqual(getTokList(map,"b,0",false), tokens("bb,0"));
+
+    // test that subsequent tokens are adjusted accordingly
+    map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
+    assertTokEqual(getTokList(map,"c,5",false), tokens("cc,5 c2,2"));
+    assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0 c2,2"));
+
+  }
+
+
+  public void testOffsetsWithOrig() throws IOException {
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = true;
+    boolean merge = true;
+
+    // test that generated tokens start at the same offset as the original
+    map.add(strings("a"), tokens("aa"), orig, merge);
+    assertTokEqual(getTokList(map,"a,5",false), tokens("a,5/aa"));
+    assertTokEqual(getTokList(map,"a,0",false), tokens("a,0/aa"));
+
+    // test that offset of first replacement is ignored (always takes the orig offset)
+    map.add(strings("b"), tokens("bb,100"), orig, merge);
+    assertTokEqual(getTokList(map,"b,5",false), tokens("bb,5/b"));
+    assertTokEqual(getTokList(map,"b,0",false), tokens("bb,0/b"));
+
+    // test that subsequent tokens are adjusted accordingly
+    map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
+    assertTokEqual(getTokList(map,"c,5",false), tokens("cc,5/c c2,2"));
+    assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0/c c2,2"));
+  }
+
+
+}



Re: svn commit: r373402 - in /incubator/solr/trunk/src/test/org: ./ apache/ apache/solr/ apache/solr/analysis/ apache/solr/analysis/TestSynonymFilter.java

Posted by Yonik Seeley <ys...@gmail.com>.
I'm fine with moving everything to lucene_extras into solr, except for
PublicFieldSortedHitQueue.  If the following patch is committed to
lucene
http://issues.apache.org/jira/browse/LUCENE-432
we could get rid of the entire lucene package in solr.

I won't be able to get to this for another day, so if anyone else
wants to do it, have at it.

-Yonik

On 1/30/06, Doug Cutting <cu...@apache.org> wrote:
> Yonik Seeley wrote:
> > Many of the things in the lucene package (FunctionQuery and
> > SynonymFilter) could be moved to org.apache.solr, and renamed to
> > org.apache.lucene if/when they officially become part of lucene.
> >
> > But the other reason for the org.apache.lucene package is for
> > accessing package-protected lucene stuff.  Currently there is just
> > PublicFieldSortedHitQueue, but there was more when we used Lucene 1.4.
>
> Everything that's not required to be in a lucene package for access
> reasons should be in org.apache.solr.  And we should try to fix Lucene
> so that nothing has to be in its packages.
>
> Doug
>

Re: svn commit: r373402 - in /incubator/solr/trunk/src/test/org: ./ apache/ apache/solr/ apache/solr/analysis/ apache/solr/analysis/TestSynonymFilter.java

Posted by Doug Cutting <cu...@apache.org>.
Yonik Seeley wrote:
> Many of the things in the lucene package (FunctionQuery and
> SynonymFilter) could be moved to org.apache.solr, and renamed to
> org.apache.lucene if/when they officially become part of lucene.
> 
> But the other reason for the org.apache.lucene package is for
> accessing package-protected lucene stuff.  Currently there is just
> PublicFieldSortedHitQueue, but there was more when we used Lucene 1.4.

Everything that's not required to be in a lucene package for access 
reasons should be in org.apache.solr.  And we should try to fix Lucene 
so that nothing has to be in its packages.

Doug

Re: svn commit: r373402 - in /incubator/solr/trunk/src/test/org: ./ apache/ apache/solr/ apache/solr/analysis/ apache/solr/analysis/TestSynonymFilter.java

Posted by Otis Gospodnetic <ot...@yahoo.com>.
Nutch is a Lucene sub-project, and it uses org.apache.nutch, not org.apache.lucene.nutch.
I'd follow that for consistency's sake, until we see a problem.

Otis

----- Original Message ----
From: Yonik Seeley <ys...@gmail.com>
To: solr-dev@lucene.apache.org
Cc: solr-commits@lucene.apache.org
Sent: Sun 29 Jan 2006 08:00:51 PM EST
Subject: Re: svn commit: r373402 - in /incubator/solr/trunk/src/test/org: ./ apache/ apache/solr/ apache/solr/analysis/ apache/solr/analysis/TestSynonymFilter.java

On 1/29/06, yoavs@apache.org <yo...@apache.org> wrote:
> Still need to rename the org.apache.lucene package classes to org.apache.solr
> (or do we want org.apache.lucene.solr?)

Many of the things in the lucene package (FunctionQuery and
SynonymFilter) could be moved to org.apache.solr, and renamed to
org.apache.lucene if/when they officially become part of lucene.

But the other reason for the org.apache.lucene package is for
accessing package-protected lucene stuff.  Currently there is just
PublicFieldSortedHitQueue, but there was more when we used Lucene 1.4.

Anyone have thoughts about that?  Should the lucene package in solr go
away? (after making FieldSortedHitQueue public, of course ;-)

-Yonik




Re: svn commit: r373402 - in /incubator/solr/trunk/src/test/org: ./ apache/ apache/solr/ apache/solr/analysis/ apache/solr/analysis/TestSynonymFilter.java

Posted by Yonik Seeley <ys...@gmail.com>.
On 1/29/06, yoavs@apache.org <yo...@apache.org> wrote:
> Still need to rename the org.apache.lucene package classes to org.apache.solr
> (or do we want org.apache.lucene.solr?)

Many of the things in the lucene package (FunctionQuery and
SynonymFilter) could be moved to org.apache.solr, and renamed to
org.apache.lucene if/when they officially become part of lucene.

But the other reason for the org.apache.lucene package is for
accessing package-protected lucene stuff.  Currently there is just
PublicFieldSortedHitQueue, but there was more when we used Lucene 1.4.

Anyone have thoughts about that?  Should the lucene package in solr go
away? (after making FieldSortedHitQueue public, of course ;-)

-Yonik

Re: svn commit: r373402 - in /incubator/solr/trunk/src/test/org: ./ apache/ apache/solr/ apache/solr/analysis/ apache/solr/analysis/TestSynonymFilter.java

Posted by Yonik Seeley <ys...@gmail.com>.
On 1/29/06, yoavs@apache.org <yo...@apache.org> wrote:
> Still need to rename the org.apache.lucene package classes to org.apache.solr
> (or do we want org.apache.lucene.solr?)

Many of the things in the lucene package (FunctionQuery and
SynonymFilter) could be moved to org.apache.solr, and renamed to
org.apache.lucene if/when they officially become part of lucene.

But the other reason for the org.apache.lucene package is for
accessing package-protected lucene stuff.  Currently there is just
PublicFieldSortedHitQueue, but there was more when we used Lucene 1.4.

Anyone have thoughts about that?  Should the lucene package in solr go
away? (after making FieldSortedHitQueue public, of course ;-)

-Yonik