You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/31 15:44:05 UTC

svn commit: r1065626 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/contrib/analyzers/common/src/java/ lucene/src/java/org/apache/lucene/analysis/ lucene/src/test/org/apache/lucene/analysis/ solr/

Author: rmuir
Date: Mon Jan 31 14:44:05 2011
New Revision: 1065626

URL: http://svn.apache.org/viewvc?rev=1065626&view=rev
Log:
LUCENE-2901: fix consistency of KeywordMarkerFilter, it should only set, not unset the attribute

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/KeywordMarkerFilter.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java
    lucene/dev/branches/branch_3x/solr/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1065626&r1=1065625&r2=1065626&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Mon Jan 31 14:44:05 2011
@@ -411,8 +411,8 @@ New features
   stopwords, and implement many analyzers in contrib with it.  
   (Simon Willnauer via Robert Muir)
   
-* LUCENE-2198: Support protected words in stemming TokenFilters using a
-  new KeywordAttribute.  (Simon Willnauer via Uwe Schindler)
+* LUCENE-2198, LUCENE-2901: Support protected words in stemming TokenFilters using a
+  new KeywordAttribute.  (Simon Willnauer, Drew Farris via Uwe Schindler)
   
 * LUCENE-2183, LUCENE-2240, LUCENE-2241: Added Unicode 4 support
   to CharTokenizer and its subclasses. CharTokenizer now has new

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/KeywordMarkerFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/KeywordMarkerFilter.java?rev=1065626&r1=1065625&r2=1065626&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/KeywordMarkerFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/KeywordMarkerFilter.java Mon Jan 31 14:44:05 2011
@@ -71,10 +71,12 @@ public final class KeywordMarkerFilter e
   @Override
   public final boolean incrementToken() throws IOException {
     if (input.incrementToken()) {
-      keywordAttr.setKeyword(keywordSet.contains(termAtt.buffer(), 0,
-          termAtt.length()));
+      if (keywordSet.contains(termAtt.buffer(), 0, termAtt.length())) { 
+        keywordAttr.setKeyword(true);
+      }
       return true;
-    } else
+    } else {
       return false;
+    }
   }
 }

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java?rev=1065626&r1=1065625&r2=1065626&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java Mon Jan 31 14:44:05 2011
@@ -2,6 +2,7 @@ package org.apache.lucene.analysis;
 
 import java.io.IOException;
 import java.io.StringReader;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Locale;
 import java.util.Set;
@@ -52,6 +53,19 @@ public class TestKeywordMarkerFilter ext
             "The quIck browN LuceneFox Jumps")), set2)), output);
   }
 
+  // LUCENE-2901
+  public void testComposition() throws Exception {   
+    TokenStream ts = new LowerCaseFilterMock(
+                     new KeywordMarkerFilter(
+                     new KeywordMarkerFilter(
+                     new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+                     new StringReader("Dogs Trees Birds Houses")),
+                     new HashSet<String>(Arrays.asList(new String[] { "Birds", "Houses" }))), 
+                     new HashSet<String>(Arrays.asList(new String[] { "Dogs", "Trees" }))));
+    
+    assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" });
+  }
+  
   public static final class LowerCaseFilterMock extends TokenFilter {
 
     private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);