You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/02/09 22:45:42 UTC
svn commit: r1242543 - in
/lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji:
KuromojiAnalyzer.java KuromojiPartOfSpeechStopFilter.java
Author: rmuir
Date: Thu Feb 9 21:45:41 2012
New Revision: 1242543
URL: http://svn.apache.org/viewvc?rev=1242543&view=rev
Log:
LUCENE-3751: align default japanese configurations for lucene/solr
Modified:
lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiAnalyzer.java
lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiPartOfSpeechStopFilter.java
Modified: lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiAnalyzer.java?rev=1242543&r1=1242542&r2=1242543&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiAnalyzer.java Thu Feb 9 21:45:41 2012
@@ -63,7 +63,7 @@ public class KuromojiAnalyzer extends St
static {
try {
- DEFAULT_STOP_SET = loadStopwordSet(false, KuromojiAnalyzer.class, "stopwords.txt", "#");
+ DEFAULT_STOP_SET = loadStopwordSet(true, KuromojiAnalyzer.class, "stopwords.txt", "#"); // ignore case
final CharArraySet tagset = loadStopwordSet(false, KuromojiAnalyzer.class, "stoptags.txt", "#");
DEFAULT_STOP_TAGS = new HashSet<String>();
for (Object element : tagset) {
@@ -71,9 +71,8 @@ public class KuromojiAnalyzer extends St
DEFAULT_STOP_TAGS.add(new String(chars));
}
} catch (IOException ex) {
- // default set should always be present as it is part of the
- // distribution (JAR)
- throw new RuntimeException("Unable to load default stopword set");
+ // default set should always be present as it is part of the distribution (JAR)
+ throw new RuntimeException("Unable to load default stopword or stoptag set");
}
}
}
@@ -81,11 +80,11 @@ public class KuromojiAnalyzer extends St
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new KuromojiTokenizer(this.segmenter, reader);
- TokenStream stream = new LowerCaseFilter(matchVersion, tokenizer);
- stream = new CJKWidthFilter(stream);
+ TokenStream stream = new KuromojiBaseFormFilter(tokenizer);
stream = new KuromojiPartOfSpeechStopFilter(true, stream, stoptags);
+ stream = new CJKWidthFilter(stream);
stream = new StopFilter(matchVersion, stream, stopwords);
- stream = new KuromojiBaseFormFilter(stream);
+ stream = new LowerCaseFilter(matchVersion, stream);
return new TokenStreamComponents(tokenizer, stream);
}
}
Modified: lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiPartOfSpeechStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiPartOfSpeechStopFilter.java?rev=1242543&r1=1242542&r2=1242543&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiPartOfSpeechStopFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiPartOfSpeechStopFilter.java Thu Feb 9 21:45:41 2012
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.util.F
import org.apache.lucene.analysis.TokenStream;
/**
- * Removes tokens that match a set of POS tags.
+ * Removes tokens that match a set of part-of-speech tags.
*/
public final class KuromojiPartOfSpeechStopFilter extends FilteringTokenFilter {
private final Set<String> stopTags;