You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/01/03 05:55:28 UTC

svn commit: r1226645 - /lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java

Author: rmuir
Date: Tue Jan  3 04:55:28 2012
New Revision: 1226645

URL: http://svn.apache.org/viewvc?rev=1226645&view=rev
Log:
LUCENE-3305: nuke commons-io dependency

Modified:
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java?rev=1226645&r1=1226644&r2=1226645&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java Tue Jan  3 04:55:28 2012
@@ -21,7 +21,6 @@ import java.io.IOException;
 import java.io.Reader;
 import java.util.List;
 
-import org.apache.commons.io.IOUtils;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -33,7 +32,7 @@ public class KuromojiTokenizer extends T
   private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
   private final org.apache.lucene.analysis.kuromoji.Tokenizer tokenizer;
   
-  private String str;
+  private final StringBuilder str = new StringBuilder();
   
   private List<Token> tokens;
   
@@ -44,13 +43,13 @@ public class KuromojiTokenizer extends T
     this.tokenizer = tokenizer;
     // nocommit: this won't really work for large docs.
     // what kind of context does kuromoji need? just sentence maybe?
-    str = IOUtils.toString(input);
+    fillBuffer(str, input);
     init();
   }
   
   private void init() {
     tokenIndex = 0;
-    tokens = tokenizer.tokenize(str);
+    tokens = tokenizer.tokenize(str.toString());
   }
   
   @Override
@@ -80,8 +79,16 @@ public class KuromojiTokenizer extends T
   @Override
   public void reset(Reader input) throws IOException{
     super.reset(input);
-    str = IOUtils.toString(input);
+    fillBuffer(str, input);
     init();
   }
   
+  final char[] buffer = new char[8192];
+  private void fillBuffer(StringBuilder sb, Reader input) throws IOException {
+    int len;
+    sb.setLength(0);
+    while ((len = input.read(buffer)) > 0) {
+      sb.append(buffer, 0, len);
+    }
+  }
 }