You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/01/07 23:36:34 UTC
svn commit: r1228746 - in
/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src:
java/org/apache/lucene/analysis/kuromoji/
java/org/apache/lucene/analysis/kuromoji/viterbi/
test/org/apache/lucene/analysis/kuromoji/ test/org/apache/lucene/analys...
Author: uschindler
Date: Sat Jan 7 22:36:34 2012
New Revision: 1228746
URL: http://svn.apache.org/viewvc?rev=1228746&view=rev
Log:
LUCENE-3305: Rename Tokenizer to Segmenter, remove builders. Move the Graphviz code to Segmenter as debugTokenize, which lazy init of GraphViz component.
Added:
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java
- copied, changed from r1228735, lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Tokenizer.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SegmenterTest.java
- copied, changed from r1228735, lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TokenizerTest.java
Removed:
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/DebugTokenizer.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Tokenizer.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TokenizerTest.java
Modified:
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiAnalyzer.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/viterbi/Viterbi.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestQuality.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/dict/UserDictionaryTest.java
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiAnalyzer.java?rev=1228746&r1=1228745&r2=1228746&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiAnalyzer.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiAnalyzer.java Sat Jan 7 22:36:34 2012
@@ -23,15 +23,15 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.Tokenizer;
public class KuromojiAnalyzer extends Analyzer {
- private final org.apache.lucene.analysis.kuromoji.Tokenizer tokenizer;
+ private final Segmenter segmenter;
- public KuromojiAnalyzer(org.apache.lucene.analysis.kuromoji.Tokenizer tokenizer) {
- this.tokenizer = tokenizer;
+ public KuromojiAnalyzer(Segmenter segmenter) {
+ this.segmenter = segmenter;
}
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KuromojiTokenizer(this.tokenizer, reader);
+ Tokenizer tokenizer = new KuromojiTokenizer(this.segmenter, reader);
return new TokenStreamComponents(tokenizer, tokenizer);
}
}
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java?rev=1228746&r1=1228745&r2=1228746&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java Sat Jan 7 22:36:34 2012
@@ -34,22 +34,22 @@ public final class KuromojiTokenizer ext
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final BaseFormAttribute basicFormAtt = addAttribute(BaseFormAttribute.class);
private final PartOfSpeechAttribute posAtt = addAttribute(PartOfSpeechAttribute.class);
- private final org.apache.lucene.analysis.kuromoji.Tokenizer tokenizer;
+ private final Segmenter segmenter;
private List<Token> tokens;
private int tokenIndex = 0;
private int sentenceStart = 0;
- public KuromojiTokenizer(org.apache.lucene.analysis.kuromoji.Tokenizer tokenizer, Reader input) {
+ public KuromojiTokenizer(Segmenter segmenter, Reader input) {
super(input, (BreakIterator) proto.clone());
- this.tokenizer = tokenizer;
+ this.segmenter = segmenter;
}
@Override
protected void setNextSentence(int sentenceStart, int sentenceEnd) {
this.sentenceStart = sentenceStart;
// TODO: maybe don't pass 0 here, so kuromoji tracks offsets for us?
- tokens = tokenizer.doTokenize(0, buffer, sentenceStart, sentenceEnd-sentenceStart);
+ tokens = segmenter.doTokenize(0, buffer, sentenceStart, sentenceEnd-sentenceStart);
tokenIndex = 0;
}
Copied: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java (from r1228735, lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Tokenizer.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java?p2=lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java&p1=lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Tokenizer.java&r1=1228735&r2=1228746&rev=1228746&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Tokenizer.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/Segmenter.java Sat Jan 7 22:36:34 2012
@@ -27,6 +27,7 @@ import java.util.EnumMap;
import java.util.List;
import org.apache.lucene.analysis.kuromoji.dict.*;
+import org.apache.lucene.analysis.kuromoji.viterbi.GraphvizFormatter;
import org.apache.lucene.analysis.kuromoji.viterbi.Viterbi;
import org.apache.lucene.analysis.kuromoji.viterbi.ViterbiNode;
import org.apache.lucene.analysis.kuromoji.viterbi.ViterbiNode.Type;
@@ -35,8 +36,8 @@ import org.apache.lucene.analysis.kuromo
* Tokenizer main class.
* Thread safe.
*/
-public class Tokenizer {
- public enum Mode {
+public class Segmenter {
+ public static enum Mode {
NORMAL, SEARCH, EXTENDED
}
@@ -46,10 +47,26 @@ public class Tokenizer {
private final boolean split;
- /**
- * Constructor
- */
- protected Tokenizer(UserDictionary userDictionary, Mode mode, boolean split) {
+ private final Object formatterLock = new Object();
+ private transient GraphvizFormatter formatter = null;
+
+ public Segmenter() {
+ this(null, Mode.NORMAL, false);
+ }
+
+ public Segmenter(UserDictionary userDictionary, Mode mode) {
+ this(userDictionary, mode, false);
+ }
+
+ public Segmenter(UserDictionary userDictionary) {
+ this(userDictionary, Mode.NORMAL, false);
+ }
+
+ public Segmenter(Mode mode) {
+ this(null, mode, false);
+ }
+
+ public Segmenter(UserDictionary userDictionary, Mode mode, boolean split) {
final TokenInfoDictionary dict = TokenInfoDictionary.getInstance();
final UnknownDictionary unknownDict = UnknownDictionary.getInstance();
@@ -157,80 +174,17 @@ public class Tokenizer {
return result;
}
- /**
- * Get Builder to create Tokenizer instance.
- * @return Builder
- */
- public static Builder builder() {
- return new Builder();
- }
-
- /**
- * Builder class used to create Tokenizer instance.
- */
- public static class Builder {
+ /** returns a Graphviz String */
+ public String debugTokenize(String text) {
+ synchronized(formatterLock) {
+ if (this.formatter == null) {
+ this.formatter = new GraphvizFormatter(ConnectionCosts.getInstance());
+ }
+ }
- private Mode mode = Mode.NORMAL;
+ ViterbiNode[][][] lattice = this.viterbi.build(text.toCharArray(), 0, text.length());
+ List<ViterbiNode> bestPath = this.viterbi.search(lattice);
- // this is true, for other use.
- // lucene's tokenizer uses a breakiterator and doTokenize directly.
- private boolean split = true;
-
- private UserDictionary userDictionary = null;
-
- /**
- * Set tokenization mode
- * Default: NORMAL
- * @param mode tokenization mode
- * @return Builder
- */
- public Builder mode(Mode mode) {
- this.mode = mode;
- return this;
- }
-
- /**
- * Set if tokenizer should split input string at "ã" and "ã" before tokenize to increase performance.
- * Splitting shouldn't change the result of tokenization most of the cases.
- * Default: true
- *
- * @param split whether tokenizer should split input string
- * @return Builder
- */
- public Builder split(boolean split) {
- this.split = split;
- return this;
- }
-
- /**
- * Set user dictionary input stream
- * @param userDictionaryReader dictionary file as {@link Reader}
- * @return Builder
- * @throws IOException
- */
- public Builder userDictionary(Reader userDictionaryReader) throws IOException {
- this.userDictionary = new UserDictionary(userDictionaryReader);
- return this;
- }
-
- /**
- * Set user dictionary path
- * @param userDictionaryPath path to dictionary file
- * @return Builder
- * @throws IOException
- * @throws FileNotFoundException
- */
- public Builder userDictionary(String userDictionaryPath) throws FileNotFoundException, IOException {
- this.userDictionary = new UserDictionary(userDictionaryPath);
- return this;
- }
-
- /**
- * Create Tokenizer instance
- * @return Tokenizer
- */
- public Tokenizer build() {
- return new Tokenizer(userDictionary, mode, split);
- }
+ return this.formatter.format(lattice[0], lattice[1], bestPath);
}
}
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/viterbi/Viterbi.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/viterbi/Viterbi.java?rev=1228746&r1=1228745&r2=1228746&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/viterbi/Viterbi.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/viterbi/Viterbi.java Sat Jan 7 22:36:34 2012
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.kurom
import java.util.LinkedList;
import java.util.List;
-import org.apache.lucene.analysis.kuromoji.Tokenizer.Mode;
+import org.apache.lucene.analysis.kuromoji.Segmenter.Mode;
import org.apache.lucene.analysis.kuromoji.dict.CharacterDefinition;
import org.apache.lucene.analysis.kuromoji.dict.ConnectionCosts;
import org.apache.lucene.analysis.kuromoji.dict.TokenInfoDictionary;
Copied: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SegmenterTest.java (from r1228735, lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TokenizerTest.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SegmenterTest.java?p2=lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SegmenterTest.java&p1=lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TokenizerTest.java&r1=1228735&r2=1228746&rev=1228746&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TokenizerTest.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SegmenterTest.java Sat Jan 7 22:36:34 2012
@@ -21,25 +21,23 @@ import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.util.List;
-import org.apache.lucene.analysis.kuromoji.Token;
-import org.apache.lucene.analysis.kuromoji.Tokenizer;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
-public class TokenizerTest extends LuceneTestCase {
+public class SegmenterTest extends LuceneTestCase {
- private static Tokenizer tokenizer;
+ private static Segmenter segmenter;
@BeforeClass
public static void setUpBeforeClass() throws Exception {
- tokenizer = Tokenizer.builder().build();
+ segmenter = new Segmenter();
}
@AfterClass
public static void afterClass() throws Exception {
- tokenizer = null;
+ segmenter = null;
}
@Test
@@ -56,7 +54,7 @@ public class TokenizerTest extends Lucen
"ã¹ãã¼ã¹", "ã¹ãã¼ã·ã§ã³", "ã«", "è¡ã", "ã¾ã", "ã",
"ãããããã", "ã"
};
- List<Token> tokens = tokenizer.tokenize(input);
+ List<Token> tokens = segmenter.tokenize(input);
assertTrue(tokens.size() == surfaceForms.length);
for (int i = 0; i < tokens.size(); i++) {
assertEquals(surfaceForms[i], tokens.get(i).getSurfaceFormString());
@@ -65,7 +63,7 @@ public class TokenizerTest extends Lucen
@Test
public void testReadings() {
- List<Token> tokens = tokenizer.tokenize("寿å¸ãé£ã¹ããã§ãã");
+ List<Token> tokens = segmenter.tokenize("寿å¸ãé£ã¹ããã§ãã");
assertTrue(tokens.size() == 6);
assertEquals(tokens.get(0).getReading(), "ã¹ã·");
assertEquals(tokens.get(1).getReading(), "ã¬");
@@ -77,7 +75,7 @@ public class TokenizerTest extends Lucen
@Test
public void testReadings2() {
- List<Token> tokens = tokenizer.tokenize("å¤ãã®å¦çã試é¨ã«è½ã¡ãã");
+ List<Token> tokens = segmenter.tokenize("å¤ãã®å¦çã試é¨ã«è½ã¡ãã");
assertEquals(9, tokens.size());
assertEquals("ãªãªã¯", tokens.get(0).getReading());
assertEquals("ã", tokens.get(1).getReading());
@@ -92,7 +90,7 @@ public class TokenizerTest extends Lucen
@Test
public void testPronunciations() {
- List<Token> tokens = tokenizer.tokenize("寿å¸ãé£ã¹ããã§ãã");
+ List<Token> tokens = segmenter.tokenize("寿å¸ãé£ã¹ããã§ãã");
assertTrue(tokens.size() == 6);
assertEquals("ã¹ã·", tokens.get(0).getPronunciation());
assertEquals("ã¬", tokens.get(1).getPronunciation());
@@ -104,7 +102,7 @@ public class TokenizerTest extends Lucen
@Test
public void testPronunciations2() {
- List<Token> tokens = tokenizer.tokenize("å¤ãã®å¦çã試é¨ã«è½ã¡ãã");
+ List<Token> tokens = segmenter.tokenize("å¤ãã®å¦çã試é¨ã«è½ã¡ãã");
assertEquals(9, tokens.size());
// pronunciation differs from reading here
assertEquals("ãªã¼ã¯", tokens.get(0).getPronunciation());
@@ -120,7 +118,7 @@ public class TokenizerTest extends Lucen
@Test
public void testBasicForms() {
- List<Token> tokens = tokenizer.tokenize("ããã¯ã¾ã å®é¨æ®µéã«ããã¾ãã");
+ List<Token> tokens = segmenter.tokenize("ããã¯ã¾ã å®é¨æ®µéã«ããã¾ãã");
assertEquals(9, tokens.size());
assertNull(tokens.get(0).getBaseForm());
assertNull(tokens.get(1).getBaseForm());
@@ -135,7 +133,7 @@ public class TokenizerTest extends Lucen
@Test
public void testPartOfSpeech() {
- List<Token> tokens = tokenizer.tokenize("ããã¯ã¾ã å®é¨æ®µéã«ããã¾ãã");
+ List<Token> tokens = segmenter.tokenize("ããã¯ã¾ã å®é¨æ®µéã«ããã¾ãã");
assertEquals(9, tokens.size());
assertEquals("åè©,代åè©,ä¸è¬,*", tokens.get(0).getPartOfSpeech());
assertEquals("å©è©,ä¿å©è©,*,*", tokens.get(1).getPartOfSpeech());
@@ -169,7 +167,7 @@ public class TokenizerTest extends Lucen
}
long totalStart = System.currentTimeMillis();
for (int i = 0; i < numIterations; i++){
- tokenizer.tokenize(line);
+ segmenter.tokenize(line);
}
if (VERBOSE) {
System.out.println("Total time : " + (System.currentTimeMillis() - totalStart));
@@ -179,7 +177,7 @@ public class TokenizerTest extends Lucen
totalStart = System.currentTimeMillis();
for (int i = 0; i < numIterations; i++) {
for (String sentence: sentences) {
- tokenizer.tokenize(sentence);
+ segmenter.tokenize(sentence);
}
}
if (VERBOSE) {
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java?rev=1228746&r1=1228745&r2=1228746&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java Sat Jan 7 22:36:34 2012
@@ -5,7 +5,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
-import org.apache.lucene.analysis.kuromoji.Tokenizer.Mode;
import org.apache.lucene.analysis.kuromoji.tokenattributes.PartOfSpeechAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
@@ -15,9 +14,8 @@ import org.apache.lucene.util.Version;
public class SimpleBench {
public static void main(String args[]) throws Exception {
- org.apache.lucene.analysis.kuromoji.Tokenizer tokenizer =
- org.apache.lucene.analysis.kuromoji.Tokenizer.builder().mode(Mode.NORMAL).build();
- Analyzer a = new KuromojiAnalyzer(tokenizer);
+ Segmenter segmenter = new Segmenter();
+ Analyzer a = new KuromojiAnalyzer(segmenter);
Analyzer b = new CJKAnalyzer(Version.LUCENE_CURRENT);
/* slight warmup */
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java?rev=1228746&r1=1228745&r2=1228746&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java Sat Jan 7 22:36:34 2012
@@ -22,7 +22,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.kuromoji.Tokenizer.Mode;
import org.apache.lucene.util._TestUtil;
public class TestKuromojiAnalyzer extends BaseTokenStreamTestCase {
@@ -30,9 +29,8 @@ public class TestKuromojiAnalyzer extend
public void setUp() throws Exception {
super.setUp();
- org.apache.lucene.analysis.kuromoji.Tokenizer tokenizer =
- org.apache.lucene.analysis.kuromoji.Tokenizer.builder().mode(Mode.NORMAL).build();
- analyzer = new KuromojiAnalyzer(tokenizer);
+ final Segmenter segmenter = new Segmenter();
+ analyzer = new KuromojiAnalyzer(segmenter);
}
public void testDecomposition1() throws Exception {
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java?rev=1228746&r1=1228745&r2=1228746&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java Sat Jan 7 22:36:34 2012
@@ -23,20 +23,18 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.kuromoji.Tokenizer.Mode;
public class TestKuromojiBaseFormFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer;
public void setUp() throws Exception {
super.setUp();
- final org.apache.lucene.analysis.kuromoji.Tokenizer t =
- org.apache.lucene.analysis.kuromoji.Tokenizer.builder().mode(Mode.NORMAL).build();
+ final Segmenter segmenter = new Segmenter();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KuromojiTokenizer(t, reader);
+ Tokenizer tokenizer = new KuromojiTokenizer(segmenter, reader);
return new TokenStreamComponents(tokenizer, new KuromojiBaseFormFilter(tokenizer));
}
};
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestQuality.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestQuality.java?rev=1228746&r1=1228745&r2=1228746&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestQuality.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestQuality.java Sat Jan 7 22:36:34 2012
@@ -31,7 +31,6 @@ import java.util.zip.ZipFile;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.kuromoji.Tokenizer.Mode;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
@@ -56,9 +55,8 @@ public class TestQuality extends LuceneT
sentence agreement?: 0.998161495317142
word agreement?: 0.999587584716181
*/
- final org.apache.lucene.analysis.kuromoji.Tokenizer tokenizer =
- org.apache.lucene.analysis.kuromoji.Tokenizer.builder().mode(Mode.NORMAL).build();
- Analyzer testAnalyzer = new KuromojiAnalyzer(tokenizer);
+ final Segmenter segmenter = new Segmenter();
+ Analyzer testAnalyzer = new KuromojiAnalyzer(segmenter);
String line1 = null;
String line2 = null;
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/dict/UserDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/dict/UserDictionaryTest.java?rev=1228746&r1=1228745&r2=1228746&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/dict/UserDictionaryTest.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/dict/UserDictionaryTest.java Sat Jan 7 22:36:34 2012
@@ -23,7 +23,7 @@ import java.io.InputStreamReader;
import java.io.Reader;
import java.io.IOException;
-import org.apache.lucene.analysis.kuromoji.TokenizerTest;
+import org.apache.lucene.analysis.kuromoji.SegmenterTest;
import org.apache.lucene.analysis.kuromoji.dict.UserDictionary;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
@@ -32,7 +32,7 @@ import org.junit.Test;
public class UserDictionaryTest extends LuceneTestCase {
private UserDictionary readDict() throws IOException {
- InputStream is = TokenizerTest.class.getResourceAsStream("userdict.txt");
+ InputStream is = SegmenterTest.class.getResourceAsStream("userdict.txt");
if (is == null)
throw new FileNotFoundException("Cannot find userdict.txt in test classpath!");
try {