You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/01/10 13:33:30 UTC

svn commit: r1229519 - in /lucene/dev/trunk/modules/analysis/common/src: java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java

Author: simonw
Date: Tue Jan 10 12:33:29 2012
New Revision: 1229519

URL: http://svn.apache.org/viewvc?rev=1229519&view=rev
Log:
SOLR-3020: Add KeywordAttribute support to HunspellStemFilter

Added:
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
Modified:
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java?rev=1229519&r1=1229518&r2=1229519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java Tue Jan 10 12:33:29 2012
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.TokenF
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.hunspell.HunspellStemmer.Stem;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 
 /**
@@ -34,6 +35,7 @@ public final class HunspellStemFilter ex
   
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+  private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
   private final HunspellStemmer stemmer;
   
   private List<Stem> buffer;
@@ -84,6 +86,10 @@ public final class HunspellStemFilter ex
       return false;
     }
     
+    if (keywordAtt.isKeyword()) {
+      return true;
+    }
+    
     buffer = dedup ? stemmer.uniqueStems(termAtt.buffer(), termAtt.length()) : stemmer.stem(termAtt.buffer(), termAtt.length());
 
     if (buffer.isEmpty()) { // we do not know this word, return it unchanged

Added: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java?rev=1229519&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java (added)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java Tue Jan 10 12:33:29 2012
@@ -0,0 +1,61 @@
+package org.apache.lucene.analysis.hunspell;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;
+import org.junit.BeforeClass;
+
+public class HunspellStemFilterTest  extends BaseTokenStreamTestCase {
+  
+  private static HunspellDictionary DICTIONARY;
+  @BeforeClass
+  public static void beforeClass() throws IOException, ParseException {
+    DICTIONARY = createDict(true);
+  }
+  public static HunspellDictionary createDict(boolean ignoreCase) throws IOException, ParseException {
+    InputStream affixStream = HunspellStemmerTest.class.getResourceAsStream("test.aff");
+    InputStream dictStream = HunspellStemmerTest.class.getResourceAsStream("test.dic");
+
+    return new HunspellDictionary(affixStream, dictStream, Version.LUCENE_40, ignoreCase);
+  }
+  
+  /**
+   * Simple test for KeywordAttribute
+   */
+  public void testKeywordAttribute() throws IOException {
+    MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"), MockTokenizer.WHITESPACE, true);
+    tokenizer.setEnableChecks(true);
+    HunspellStemFilter filter = new HunspellStemFilter(tokenizer, DICTIONARY);
+    assertTokenStreamContents(filter, new String[]{"lucene", "lucen", "is", "awesome"}, new int[] {1, 0, 1, 1});
+    
+    // assert with keywork marker
+    tokenizer = new MockTokenizer(new StringReader("lucene is awesome"), MockTokenizer.WHITESPACE, true);
+    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true);
+    filter = new HunspellStemFilter(new KeywordMarkerFilter(tokenizer, set), DICTIONARY);
+    assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
+  }
+}