You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/02/05 13:43:47 UTC

svn commit: r1240711 - in /lucene/dev/branches/branch_3x: ./ lucene/contrib/analyzers/kuromoji/ lucene/contrib/analyzers/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/ solr/ solr/core/ solr/core/src/java/org/apache/solr/analysis/

Author: rmuir
Date: Sun Feb  5 12:43:46 2012
New Revision: 1240711

URL: http://svn.apache.org/viewvc?rev=1240711&view=rev
Log:
LUCENE-3726: default Kuromoji to search mode

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/core/   (props changed)
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java?rev=1240711&r1=1240710&r2=1240711&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java Sun Feb  5 12:43:46 2012
@@ -22,7 +22,11 @@ import java.util.ArrayList;
 import java.util.EnumMap;
 import java.util.List;
 
-import org.apache.lucene.analysis.kuromoji.dict.*;
+import org.apache.lucene.analysis.kuromoji.dict.ConnectionCosts;
+import org.apache.lucene.analysis.kuromoji.dict.Dictionary;
+import org.apache.lucene.analysis.kuromoji.dict.TokenInfoDictionary;
+import org.apache.lucene.analysis.kuromoji.dict.UnknownDictionary;
+import org.apache.lucene.analysis.kuromoji.dict.UserDictionary;
 import org.apache.lucene.analysis.kuromoji.viterbi.GraphvizFormatter;
 import org.apache.lucene.analysis.kuromoji.viterbi.Viterbi;
 import org.apache.lucene.analysis.kuromoji.viterbi.ViterbiNode;
@@ -37,6 +41,8 @@ public class Segmenter {
     NORMAL, SEARCH, EXTENDED
   }
   
+  public static final Mode DEFAULT_MODE = Mode.SEARCH;
+  
   private final Viterbi viterbi;
   
   private final EnumMap<Type, Dictionary> dictionaryMap = new EnumMap<Type, Dictionary>(Type.class);
@@ -44,31 +50,25 @@ public class Segmenter {
   private final boolean split;
   
   public Segmenter() {
-    this(null, Mode.NORMAL, false);
+    this(null, DEFAULT_MODE, false);
   }
 
-  public Segmenter(UserDictionary userDictionary, Mode mode) {
-    this(userDictionary, mode, false);
+  public Segmenter(Mode mode) {
+    this(null, mode, false);
   }
 
   public Segmenter(UserDictionary userDictionary) {
-    this(userDictionary, Mode.NORMAL, false);
+    this(userDictionary, DEFAULT_MODE, false);
   }
 
-  public Segmenter(Mode mode) {
-    this(null, mode, false);
+  public Segmenter(UserDictionary userDictionary, Mode mode) {
+    this(userDictionary, mode, false);
   }
 
   public Segmenter(UserDictionary userDictionary, Mode mode, boolean split) {
-    
     final TokenInfoDictionary dict = TokenInfoDictionary.getInstance();
     final UnknownDictionary unknownDict = UnknownDictionary.getInstance();
-    this.viterbi = new Viterbi(dict,
-        unknownDict,
-        ConnectionCosts.getInstance(),
-        userDictionary,
-        mode);
-    
+    this.viterbi = new Viterbi(dict, unknownDict, ConnectionCosts.getInstance(), userDictionary, mode);
     this.split = split;
     
     dictionaryMap.put(Type.KNOWN, dict);

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java?rev=1240711&r1=1240710&r2=1240711&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java Sun Feb  5 12:43:46 2012
@@ -24,6 +24,7 @@ import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CodingErrorAction;
 import java.util.Locale;
+import java.util.Map;
 
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.kuromoji.KuromojiTokenizer;
@@ -62,7 +63,7 @@ public class KuromojiTokenizerFactory ex
   
   //@Override
   public void inform(ResourceLoader loader) {
-    Mode mode = args.get(MODE) != null ? Mode.valueOf(args.get(MODE).toUpperCase(Locale.ENGLISH)) : Mode.NORMAL;
+    Mode mode = getMode(args);
     String userDictionaryPath = args.get(USER_DICT_PATH);
     try {
       if (userDictionaryPath != null) {
@@ -88,4 +89,13 @@ public class KuromojiTokenizerFactory ex
   public Tokenizer create(Reader input) {
     return new KuromojiTokenizer(segmenter, input);
   }
-}
\ No newline at end of file
+  
+  private Mode getMode(Map<String, String> args) {
+    String mode = args.get(MODE);
+    if (mode != null) {
+      return Mode.valueOf(mode.toUpperCase(Locale.ENGLISH));
+    } else {
+      return Segmenter.DEFAULT_MODE;
+    }
+  }
+}