You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2013/05/10 10:01:27 UTC

svn commit: r1480912 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/analysis/ lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/ lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/

Author: uschindler
Date: Fri May 10 08:01:26 2013
New Revision: 1480912

URL: http://svn.apache.org/r1480912
Log:
Merged revision(s) 1480911 from lucene/dev/trunk:
LUCENE-4993: Fix BeiderMorseFilter to preserve custom attributes when inserting tokens with position increment 0.

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
    lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1480912&r1=1480911&r2=1480912&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Fri May 10 08:01:26 2013
@@ -92,6 +92,9 @@ Bug Fixes
 * LUCENE-4994: Fix PatternKeywordMarkerFilter to have public constructor.
   (Uwe Schindler)
   
+* LUCENE-4993: Fix BeiderMorseFilter to preserve custom attributes when
+  inserting tokens with position increment 0.  (Uwe Schindler)
+  
 Optimizations
 
 * LUCENE-4938: Don't use an unnecessarily large priority queue in IndexSearcher

Modified: lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java?rev=1480912&r1=1480911&r2=1480912&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java Fri May 10 08:01:26 2013
@@ -27,7 +27,6 @@ import org.apache.commons.codec.language
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 
 /**
@@ -48,13 +47,11 @@ public final class BeiderMorseFilter ext
   private final Matcher matcher = pattern.matcher("");
   // encoded representation
   private String encoded;
-  // offsets for any buffered outputs
-  private int startOffset;
-  private int endOffset;
+  // preserves all attributes for any buffered outputs
+  private State state;
   
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
-  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
   
   
   /**
@@ -83,10 +80,10 @@ public final class BeiderMorseFilter ext
   @Override
   public boolean incrementToken() throws IOException {
     if (matcher.find()) {
-      clearAttributes();
+      assert state != null && encoded != null;
+      restoreState(state);
       termAtt.setEmpty().append(encoded, matcher.start(1), matcher.end(1));
       posIncAtt.setPositionIncrement(0);
-      offsetAtt.setOffset(startOffset, endOffset);
       return true;
     }
     
@@ -94,8 +91,7 @@ public final class BeiderMorseFilter ext
       encoded = (languages == null) 
           ? engine.encode(termAtt.toString())
           : engine.encode(termAtt.toString(), languages);
-      startOffset = offsetAtt.startOffset();
-      endOffset = offsetAtt.endOffset();
+      state = captureState();
       matcher.reset(encoded);
       if (matcher.find()) {
         termAtt.setEmpty().append(encoded, matcher.start(1), matcher.end(1));

Modified: lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java?rev=1480912&r1=1480911&r2=1480912&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java Fri May 10 08:01:26 2013
@@ -19,7 +19,9 @@ package org.apache.lucene.analysis.phone
 
 import java.io.IOException;
 import java.io.Reader;
+import java.io.StringReader;
 import java.util.HashSet;
+import java.util.regex.Pattern;
 
 import org.apache.commons.codec.language.bm.NameType;
 import org.apache.commons.codec.language.bm.PhoneticEngine;
@@ -29,7 +31,10 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.junit.Ignore;
 
 /** Tests {@link BeiderMorseFilter} */
@@ -103,4 +108,20 @@ public class TestBeiderMorseFilter exten
     };
     checkOneTermReuse(a, "", "");
   }
+  
+  public void testCustomAttribute() throws IOException {
+    TokenStream stream = new KeywordTokenizer(new StringReader("D'Angelo"));
+    stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*"));
+    stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
+    KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class);
+    stream.reset();
+    int i = 0;
+    while(stream.incrementToken()) {
+      assertTrue(keyAtt.isKeyword());
+      i++;
+    }
+    assertEquals(12, i);
+    stream.end();
+    stream.close();
+  }
 }