You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/01/03 05:55:28 UTC
svn commit: r1226645 -
/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java
Author: rmuir
Date: Tue Jan 3 04:55:28 2012
New Revision: 1226645
URL: http://svn.apache.org/viewvc?rev=1226645&view=rev
Log:
LUCENE-3305: nuke commons-io dependency
Modified:
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java?rev=1226645&r1=1226644&r2=1226645&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java Tue Jan 3 04:55:28 2012
@@ -21,7 +21,6 @@ import java.io.IOException;
import java.io.Reader;
import java.util.List;
-import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -33,7 +32,7 @@ public class KuromojiTokenizer extends T
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private final org.apache.lucene.analysis.kuromoji.Tokenizer tokenizer;
- private String str;
+ private final StringBuilder str = new StringBuilder();
private List<Token> tokens;
@@ -44,13 +43,13 @@ public class KuromojiTokenizer extends T
this.tokenizer = tokenizer;
// nocommit: this won't really work for large docs.
// what kind of context does kuromoji need? just sentence maybe?
- str = IOUtils.toString(input);
+ fillBuffer(str, input);
init();
}
private void init() {
tokenIndex = 0;
- tokens = tokenizer.tokenize(str);
+ tokens = tokenizer.tokenize(str.toString());
}
@Override
@@ -80,8 +79,16 @@ public class KuromojiTokenizer extends T
@Override
public void reset(Reader input) throws IOException{
super.reset(input);
- str = IOUtils.toString(input);
+ fillBuffer(str, input);
init();
}
+ final char[] buffer = new char[8192];
+ private void fillBuffer(StringBuilder sb, Reader input) throws IOException {
+ int len;
+ sb.setLength(0);
+ while ((len = input.read(buffer)) > 0) {
+ sb.append(buffer, 0, len);
+ }
+ }
}