You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/02/05 13:41:13 UTC
svn commit: r1240710 - in /lucene/dev/trunk:
modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java
solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java
Author: rmuir
Date: Sun Feb 5 12:41:13 2012
New Revision: 1240710
URL: http://svn.apache.org/viewvc?rev=1240710&view=rev
Log:
LUCENE-3726: default Kuromoji to search mode
Modified:
lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java
Modified: lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java?rev=1240710&r1=1240709&r2=1240710&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java (original)
+++ lucene/dev/trunk/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java Sun Feb 5 12:41:13 2012
@@ -22,7 +22,11 @@ import java.util.ArrayList;
import java.util.EnumMap;
import java.util.List;
-import org.apache.lucene.analysis.kuromoji.dict.*;
+import org.apache.lucene.analysis.kuromoji.dict.ConnectionCosts;
+import org.apache.lucene.analysis.kuromoji.dict.Dictionary;
+import org.apache.lucene.analysis.kuromoji.dict.TokenInfoDictionary;
+import org.apache.lucene.analysis.kuromoji.dict.UnknownDictionary;
+import org.apache.lucene.analysis.kuromoji.dict.UserDictionary;
import org.apache.lucene.analysis.kuromoji.viterbi.GraphvizFormatter;
import org.apache.lucene.analysis.kuromoji.viterbi.Viterbi;
import org.apache.lucene.analysis.kuromoji.viterbi.ViterbiNode;
@@ -37,6 +41,8 @@ public class Segmenter {
NORMAL, SEARCH, EXTENDED
}
+ public static final Mode DEFAULT_MODE = Mode.SEARCH;
+
private final Viterbi viterbi;
private final EnumMap<Type, Dictionary> dictionaryMap = new EnumMap<Type, Dictionary>(Type.class);
@@ -44,31 +50,25 @@ public class Segmenter {
private final boolean split;
public Segmenter() {
- this(null, Mode.NORMAL, false);
+ this(null, DEFAULT_MODE, false);
}
- public Segmenter(UserDictionary userDictionary, Mode mode) {
- this(userDictionary, mode, false);
+ public Segmenter(Mode mode) {
+ this(null, mode, false);
}
public Segmenter(UserDictionary userDictionary) {
- this(userDictionary, Mode.NORMAL, false);
+ this(userDictionary, DEFAULT_MODE, false);
}
- public Segmenter(Mode mode) {
- this(null, mode, false);
+ public Segmenter(UserDictionary userDictionary, Mode mode) {
+ this(userDictionary, mode, false);
}
public Segmenter(UserDictionary userDictionary, Mode mode, boolean split) {
-
final TokenInfoDictionary dict = TokenInfoDictionary.getInstance();
final UnknownDictionary unknownDict = UnknownDictionary.getInstance();
- this.viterbi = new Viterbi(dict,
- unknownDict,
- ConnectionCosts.getInstance(),
- userDictionary,
- mode);
-
+ this.viterbi = new Viterbi(dict, unknownDict, ConnectionCosts.getInstance(), userDictionary, mode);
this.split = split;
dictionaryMap.put(Type.KNOWN, dict);
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java?rev=1240710&r1=1240709&r2=1240710&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java Sun Feb 5 12:41:13 2012
@@ -24,6 +24,7 @@ import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.Locale;
+import java.util.Map;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.kuromoji.KuromojiTokenizer;
@@ -62,7 +63,7 @@ public class KuromojiTokenizerFactory ex
@Override
public void inform(ResourceLoader loader) {
- Mode mode = args.get(MODE) != null ? Mode.valueOf(args.get(MODE).toUpperCase(Locale.ENGLISH)) : Mode.NORMAL;
+ Mode mode = getMode(args);
String userDictionaryPath = args.get(USER_DICT_PATH);
try {
if (userDictionaryPath != null) {
@@ -88,4 +89,13 @@ public class KuromojiTokenizerFactory ex
public Tokenizer create(Reader input) {
return new KuromojiTokenizer(segmenter, input);
}
-}
\ No newline at end of file
+
+ private Mode getMode(Map<String, String> args) {
+ String mode = args.get(MODE);
+ if (mode != null) {
+ return Mode.valueOf(mode.toUpperCase(Locale.ENGLISH));
+ } else {
+ return Segmenter.DEFAULT_MODE;
+ }
+ }
+}