You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/09/27 18:27:32 UTC

svn commit: r1391085 - in /lucene/dev/branches/lucene_solr_4_0: ./ lucene/ lucene/analysis/ lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/ lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/

Author: mikemccand
Date: Thu Sep 27 16:27:31 2012
New Revision: 1391085

URL: http://svn.apache.org/viewvc?rev=1391085&view=rev
Log:
fix test failure: posLen must be 1 when keepOrig is false for a syn match

Modified:
    lucene/dev/branches/lucene_solr_4_0/   (props changed)
    lucene/dev/branches/lucene_solr_4_0/lucene/   (props changed)
    lucene/dev/branches/lucene_solr_4_0/lucene/analysis/   (props changed)
    lucene/dev/branches/lucene_solr_4_0/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
    lucene/dev/branches/lucene_solr_4_0/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java

Modified: lucene/dev/branches/lucene_solr_4_0/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_0/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java?rev=1391085&r1=1391084&r2=1391085&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_0/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java (original)
+++ lucene/dev/branches/lucene_solr_4_0/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java Thu Sep 27 16:27:31 2012
@@ -480,7 +480,7 @@ public final class SynonymFilter extends
             // endOffset (ie, endOffset of the last input
             // token it matched):
             endOffset = matchEndOffset;
-            posLen = matchInputLength;
+            posLen = keepOrig ? matchInputLength : 1;
           } else {
             // This rule has more than one output token; we
             // can't pick any particular endOffset for this

Modified: lucene/dev/branches/lucene_solr_4_0/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_0/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java?rev=1391085&r1=1391084&r2=1391085&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_0/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java (original)
+++ lucene/dev/branches/lucene_solr_4_0/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java Thu Sep 27 16:27:31 2012
@@ -40,7 +40,6 @@ import org.apache.lucene.analysis.core.K
 import org.apache.lucene.analysis.tokenattributes.*;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util._TestUtil;
-import org.apache.lucene.util.LuceneTestCase.Slow;
 
 public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
 
@@ -53,6 +52,9 @@ public class TestSynonymMapFilter extend
   private OffsetAttribute offsetAtt;
 
   private void add(String input, String output, boolean keepOrig) {
+    if (VERBOSE) {
+      System.out.println("  add input=" + input + " output=" + output + " keepOrig=" + keepOrig);
+    }
     b.add(new CharsRef(input.replaceAll(" +", "\u0000")),
           new CharsRef(output.replaceAll(" +", "\u0000")),
           keepOrig);
@@ -137,6 +139,56 @@ public class TestSynonymMapFilter extend
     assertEquals(expectedUpto, expected.length);
   }
 
+  public void testDontKeepOrig() throws Exception {
+    b = new SynonymMap.Builder(true);
+    add("a b", "foo", false);
+
+    final SynonymMap map = b.build();
+
+    final Analyzer analyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
+      }
+    };
+
+    assertAnalyzesTo(analyzer, "a b c",
+                     new String[] {"foo", "c"},
+                     new int[] {0, 4},
+                     new int[] {3, 5},
+                     null,
+                     new int[] {1, 1},
+                     new int[] {1, 1},
+                     true);
+    checkAnalysisConsistency(random(), analyzer, false, "a b c");
+  }
+
+  public void testDoKeepOrig() throws Exception {
+    b = new SynonymMap.Builder(true);
+    add("a b", "foo", true);
+
+    final SynonymMap map = b.build();
+
+    final Analyzer analyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
+      }
+    };
+
+    assertAnalyzesTo(analyzer, "a b c",
+                     new String[] {"a", "foo", "b", "c"},
+                     new int[] {0, 0, 2, 4},
+                     new int[] {1, 3, 3, 5},
+                     null,
+                     new int[] {1, 0, 1, 1},
+                     new int[] {1, 2, 1, 1},
+                     true);
+    checkAnalysisConsistency(random(), analyzer, false, "a b c");
+  }
+
   public void testBasic() throws Exception {
     b = new SynonymMap.Builder(true);
     add("a", "foo", true);
@@ -284,7 +336,7 @@ public class TestSynonymMapFilter extend
             if (synOutputs.length == 1) {
               // Add full endOffset
               endOffset = (inputIDX*2) + syn.in.length();
-              posLen = (1+syn.in.length())/2;
+              posLen = syn.keepOrig ? (1+syn.in.length())/2 : 1;
             } else {
               // Add endOffset matching input token's
               endOffset = (matchIDX*2) + 1;
@@ -540,6 +592,9 @@ public class TestSynonymMapFilter extend
     for (int i = 0; i < numIters; i++) {
       b = new SynonymMap.Builder(random.nextBoolean());
       final int numEntries = atLeast(10);
+      if (VERBOSE) {
+        System.out.println("TEST: iter=" + i + " numEntries=" + numEntries);
+      }
       for (int j = 0; j < numEntries; j++) {
         add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
       }