You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/02/01 00:01:55 UTC

svn commit: r1238851 - in /lucene/dev/trunk: lucene/contrib/CHANGES.txt modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java

Author: mikemccand
Date: Tue Jan 31 23:01:55 2012
New Revision: 1238851

URL: http://svn.apache.org/viewvc?rev=1238851&view=rev
Log:
LUCENE-3742: fix token offset for hangs-off-end output in SynonymFilter

Modified:
    lucene/dev/trunk/lucene/contrib/CHANGES.txt
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java

Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=1238851&r1=1238850&r2=1238851&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue Jan 31 23:01:55 2012
@@ -181,7 +181,6 @@ Bug Fixes
    children (such docs will never match, but BJQ was tripping an
    assert if such a parent doc was the first doc in the segment).
    (Shay Banon, Mike McCandless)
-
  * LUCENE-3609: Fix regression in BooleanFilter, introduced in Lucene 3.5,
    to correctly handle minShouldMatch behaviour of previous versions.
    (Shay Banon, Uwe Schindler)
@@ -194,6 +193,11 @@ Bug Fixes
    cover all tokens it had matched.  (Koji Sekiguchi, Robert Muir,
    Mike McCandless)
 
+ * LUCENE-3742: When SynonymFilter has an output extending beyond the
+   input tokens, it now sets the start and end offset to the same
+   values for the last token (not 0, 0).  (Robert Muir, Mike
+   McCandless)
+
  * LUCENE-3686: CategoryEnhancement must override Object.equals(Object).
    (Sivan Yogev via Shai Erera)
 

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java?rev=1238851&r1=1238850&r2=1238851&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java Tue Jan 31 23:01:55 2012
@@ -290,6 +290,8 @@ public final class SynonymFilter extends
    capture the state if no further tokens were checked.  So
    caller must then forward state to our caller, or capture:
   */
+  private int lastStartOffset;
+  private int lastEndOffset;
 
   private void parse() throws IOException {
     //System.out.println("\nS: parse");
@@ -338,8 +340,8 @@ public final class SynonymFilter extends
             buffer = termAtt.buffer();
             bufferLen = termAtt.length();
             final PendingInput input = futureInputs[nextWrite];
-            input.startOffset = offsetAtt.startOffset();
-            input.endOffset = offsetAtt.endOffset();
+            lastStartOffset = input.startOffset = offsetAtt.startOffset();
+            lastEndOffset = input.endOffset = offsetAtt.endOffset();
             inputEndOffset = input.endOffset;
             //System.out.println("  new token=" + new String(buffer, 0, bufferLen));
             if (nextRead != nextWrite) {
@@ -582,6 +584,8 @@ public final class SynonymFilter extends
             nextWrite = nextRead = rollIncr(nextRead);
           }
           clearAttributes();
+          // Keep offset from last input token:
+          offsetAtt.setOffset(lastStartOffset, lastEndOffset);
           termAtt.copyBuffer(output.chars, output.offset, output.length);
           typeAtt.setType(TYPE_SYNONYM);
           //System.out.println("  set posIncr=" + outputs.posIncr + " outputs=" + outputs);

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java?rev=1238851&r1=1238850&r2=1238851&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java Tue Jan 31 23:01:55 2012
@@ -606,6 +606,32 @@ public class TestSynonymMapFilter extend
         new String[] { "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo", "zoo" },
         new int[] { 1, 0, 1, 0, 0, 1, 0, 1, 0, 1 });
   }
+
+  public void testOutputHangsOffEnd() throws Exception {
+    b = new SynonymMap.Builder(true);
+    final boolean keepOrig = false;
+    // b hangs off the end (no input token under it):
+    add("a", "a b", keepOrig);
+    final SynonymMap map = b.build();
+    tokensIn = new MockTokenizer(new StringReader("a"),
+                                 MockTokenizer.WHITESPACE,
+                                 true);
+    tokensIn.reset();
+    assertTrue(tokensIn.incrementToken());
+    assertFalse(tokensIn.incrementToken());
+    tokensIn.end();
+    tokensIn.close();
+
+    tokensOut = new SynonymFilter(tokensIn,
+                                     b.build(),
+                                     true);
+    termAtt = tokensOut.addAttribute(CharTermAttribute.class);
+    posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
+    offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
+
+    // Make sure endOffset inherits from previous input token:
+    verify("a", "a b:1");
+  }
   
   public void testIncludeOrig() throws Exception {
     b = new SynonymMap.Builder(true);