You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2013/05/10 10:01:27 UTC
svn commit: r1480912 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/analysis/
lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/
lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/
Author: uschindler
Date: Fri May 10 08:01:26 2013
New Revision: 1480912
URL: http://svn.apache.org/r1480912
Log:
Merged revision(s) 1480911 from lucene/dev/trunk:
LUCENE-4993: Fix BeiderMorseFilter to preserve custom attributes when inserting tokens with position increment 0.
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1480912&r1=1480911&r2=1480912&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Fri May 10 08:01:26 2013
@@ -92,6 +92,9 @@ Bug Fixes
* LUCENE-4994: Fix PatternKeywordMarkerFilter to have public constructor.
(Uwe Schindler)
+* LUCENE-4993: Fix BeiderMorseFilter to preserve custom attributes when
+ inserting tokens with position increment 0. (Uwe Schindler)
+
Optimizations
* LUCENE-4938: Don't use an unnecessarily large priority queue in IndexSearcher
Modified: lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java?rev=1480912&r1=1480911&r2=1480912&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java Fri May 10 08:01:26 2013
@@ -27,7 +27,6 @@ import org.apache.commons.codec.language
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/**
@@ -48,13 +47,11 @@ public final class BeiderMorseFilter ext
private final Matcher matcher = pattern.matcher("");
// encoded representation
private String encoded;
- // offsets for any buffered outputs
- private int startOffset;
- private int endOffset;
+ // preserves all attributes for any buffered outputs
+ private State state;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
- private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
@@ -83,10 +80,10 @@ public final class BeiderMorseFilter ext
@Override
public boolean incrementToken() throws IOException {
if (matcher.find()) {
- clearAttributes();
+ assert state != null && encoded != null;
+ restoreState(state);
termAtt.setEmpty().append(encoded, matcher.start(1), matcher.end(1));
posIncAtt.setPositionIncrement(0);
- offsetAtt.setOffset(startOffset, endOffset);
return true;
}
@@ -94,8 +91,7 @@ public final class BeiderMorseFilter ext
encoded = (languages == null)
? engine.encode(termAtt.toString())
: engine.encode(termAtt.toString(), languages);
- startOffset = offsetAtt.startOffset();
- endOffset = offsetAtt.endOffset();
+ state = captureState();
matcher.reset(encoded);
if (matcher.find()) {
termAtt.setEmpty().append(encoded, matcher.start(1), matcher.end(1));
Modified: lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java?rev=1480912&r1=1480911&r2=1480912&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java Fri May 10 08:01:26 2013
@@ -19,7 +19,9 @@ package org.apache.lucene.analysis.phone
import java.io.IOException;
import java.io.Reader;
+import java.io.StringReader;
import java.util.HashSet;
+import java.util.regex.Pattern;
import org.apache.commons.codec.language.bm.NameType;
import org.apache.commons.codec.language.bm.PhoneticEngine;
@@ -29,7 +31,10 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.junit.Ignore;
/** Tests {@link BeiderMorseFilter} */
@@ -103,4 +108,20 @@ public class TestBeiderMorseFilter exten
};
checkOneTermReuse(a, "", "");
}
+
+ public void testCustomAttribute() throws IOException {
+ TokenStream stream = new KeywordTokenizer(new StringReader("D'Angelo"));
+ stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*"));
+ stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
+ KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class);
+ stream.reset();
+ int i = 0;
+ while(stream.incrementToken()) {
+ assertTrue(keyAtt.isKeyword());
+ i++;
+ }
+ assertEquals(12, i);
+ stream.end();
+ stream.close();
+ }
}