You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/02/01 00:01:55 UTC
svn commit: r1238851 - in /lucene/dev/trunk: lucene/contrib/CHANGES.txt
modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
Author: mikemccand
Date: Tue Jan 31 23:01:55 2012
New Revision: 1238851
URL: http://svn.apache.org/viewvc?rev=1238851&view=rev
Log:
LUCENE-3742: fix token offset for hangs-off-end output in SynonymFilter
Modified:
lucene/dev/trunk/lucene/contrib/CHANGES.txt
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=1238851&r1=1238850&r2=1238851&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue Jan 31 23:01:55 2012
@@ -181,7 +181,6 @@ Bug Fixes
children (such docs will never match, but BJQ was tripping an
assert if such a parent doc was the first doc in the segment).
(Shay Banon, Mike McCandless)
-
* LUCENE-3609: Fix regression in BooleanFilter, introduced in Lucene 3.5,
to correctly handle minShouldMatch behaviour of previous versions.
(Shay Banon, Uwe Schindler)
@@ -194,6 +193,11 @@ Bug Fixes
cover all tokens it had matched. (Koji Sekiguchi, Robert Muir,
Mike McCandless)
+ * LUCENE-3742: When SynonymFilter has an output extending beyond the
+ input tokens, it now sets the start and end offset to the same
+ values for the last token (not 0, 0). (Robert Muir, Mike
+ McCandless)
+
* LUCENE-3686: CategoryEnhancement must override Object.equals(Object).
(Sivan Yogev via Shai Erera)
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java?rev=1238851&r1=1238850&r2=1238851&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java Tue Jan 31 23:01:55 2012
@@ -290,6 +290,8 @@ public final class SynonymFilter extends
capture the state if no further tokens were checked. So
caller must then forward state to our caller, or capture:
*/
+ private int lastStartOffset;
+ private int lastEndOffset;
private void parse() throws IOException {
//System.out.println("\nS: parse");
@@ -338,8 +340,8 @@ public final class SynonymFilter extends
buffer = termAtt.buffer();
bufferLen = termAtt.length();
final PendingInput input = futureInputs[nextWrite];
- input.startOffset = offsetAtt.startOffset();
- input.endOffset = offsetAtt.endOffset();
+ lastStartOffset = input.startOffset = offsetAtt.startOffset();
+ lastEndOffset = input.endOffset = offsetAtt.endOffset();
inputEndOffset = input.endOffset;
//System.out.println(" new token=" + new String(buffer, 0, bufferLen));
if (nextRead != nextWrite) {
@@ -582,6 +584,8 @@ public final class SynonymFilter extends
nextWrite = nextRead = rollIncr(nextRead);
}
clearAttributes();
+ // Keep offset from last input token:
+ offsetAtt.setOffset(lastStartOffset, lastEndOffset);
termAtt.copyBuffer(output.chars, output.offset, output.length);
typeAtt.setType(TYPE_SYNONYM);
//System.out.println(" set posIncr=" + outputs.posIncr + " outputs=" + outputs);
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java?rev=1238851&r1=1238850&r2=1238851&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java Tue Jan 31 23:01:55 2012
@@ -606,6 +606,32 @@ public class TestSynonymMapFilter extend
new String[] { "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo", "zoo" },
new int[] { 1, 0, 1, 0, 0, 1, 0, 1, 0, 1 });
}
+
+ public void testOutputHangsOffEnd() throws Exception {
+ b = new SynonymMap.Builder(true);
+ final boolean keepOrig = false;
+ // b hangs off the end (no input token under it):
+ add("a", "a b", keepOrig);
+ final SynonymMap map = b.build();
+ tokensIn = new MockTokenizer(new StringReader("a"),
+ MockTokenizer.WHITESPACE,
+ true);
+ tokensIn.reset();
+ assertTrue(tokensIn.incrementToken());
+ assertFalse(tokensIn.incrementToken());
+ tokensIn.end();
+ tokensIn.close();
+
+ tokensOut = new SynonymFilter(tokensIn,
+ b.build(),
+ true);
+ termAtt = tokensOut.addAttribute(CharTermAttribute.class);
+ posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
+ offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
+
+ // Make sure endOffset inherits from previous input token:
+ verify("a", "a b:1");
+ }
public void testIncludeOrig() throws Exception {
b = new SynonymMap.Builder(true);