You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2016/06/15 21:21:58 UTC

[3/3] lucene-solr:branch_5x: LUCENE-7279: don't throw AIOOBE on some valid inputs

LUCENE-7279: don't throw AIOOBE on some valid inputs


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/bcf1eb7d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/bcf1eb7d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/bcf1eb7d

Branch: refs/heads/branch_5x
Commit: bcf1eb7d24810eae7123c89e079823ce56b9dd25
Parents: 1c88077
Author: Mike McCandless <mi...@apache.org>
Authored: Thu May 12 05:54:22 2016 -0400
Committer: Steve Rowe <sa...@apache.org>
Committed: Wed Jun 15 17:20:50 2016 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/analysis/ja/JapaneseTokenizer.java | 11 +++++------
 .../apache/lucene/analysis/ja/TestJapaneseTokenizer.java |  9 +++++++++
 2 files changed, 14 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bcf1eb7d/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
index b1792fd..5ed78c3 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
@@ -157,9 +157,6 @@ public final class JapaneseTokenizer extends Tokenizer {
   private final boolean extendedMode;
   private final boolean outputCompounds;
 
-  // Index of the last character of unknown word:
-  private int unknownWordEndIndex = -1;
-
   // True once we've hit the EOF from the input reader:
   private boolean end;
 
@@ -273,7 +270,6 @@ public final class JapaneseTokenizer extends Tokenizer {
 
   private void resetState() {
     positions.reset();
-    unknownWordEndIndex = -1;
     pos = 0;
     end = false;
     lastBackTracePos = 0;
@@ -426,7 +422,7 @@ public final class JapaneseTokenizer extends Tokenizer {
       // end of loop), plus bigram cost:
       final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID);
       if (VERBOSE) {
-        System.out.println("      fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID);
+        System.out.println("      fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID + ")");
       }
       if (cost < leastCost) {
         leastCost = cost;
@@ -618,6 +614,9 @@ public final class JapaneseTokenizer extends Tokenizer {
       System.out.println("\nPARSE");
     }
 
+    // Index of the last character of unknown word:
+    int unknownWordEndIndex = -1;
+
     // Advances over each position (character):
     while (true) {
 
@@ -728,7 +727,7 @@ public final class JapaneseTokenizer extends Tokenizer {
       }
 
       if (VERBOSE) {
-        System.out.println("\n  extend @ pos=" + pos + " char=" + (char) buffer.get(pos));
+        System.out.println("\n  extend @ pos=" + pos + " char=" + (char) buffer.get(pos) + " hex=" + Integer.toHexString(buffer.get(pos)));
       }
 
       if (VERBOSE) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bcf1eb7d/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
index f6de188..0919f4d 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
@@ -708,4 +708,13 @@ public class
     );
     analyzer.close();
   }
+
+  public void testBigDocument() throws Exception {
+    String doc = "\u5546\u54c1\u306e\u8cfc\u5165\u30fb\u8a73\u7d30(\u30b5\u30a4\u30ba\u3001\u753b\u50cf)\u306f\u5546\u54c1\u540d\u3092\u30af\u30ea\u30c3\u30af\u3057\u3066\u304f\u3060\u3055\u3044\uff01[L.B\u3000CANDY\u3000STOCK]\u30d5\u30e9\u30ef\u30fc\u30d3\u30b8\u30e5\u30fc\u30d9\u30a2\u30c9\u30ec\u30b9[L.B\u3000DAILY\u3000STOCK]\u30dc\u30fc\u30c0\u30fc\u30cb\u30c3\u30c8\u30c8\u30c3\u30d7\u30b9\uff3bL.B\u3000DAILY\u3000STOCK\uff3d\u30dc\u30fc\u30c0\u30fc\u30ed\u30f3\u30b0\u30cb\u30c3\u30c8OP\uff3bL.B\u3000DAILY\u3000STOCK\uff3d\u30ed\u30b4\u30c8\u30fc\u30c8BAG\uff3bL.B\u3000DAILY\u3000STOCK\uff3d\u88cf\u6bdb\u30ed\u30b4\u30d7\u30ea\u30f3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u3010TV\u30c9\u30e9\u30de\u7740\u7528\u3011\u30a2\u30f3\u30b4\u30e9\u30ef\u30c3\u30d5\u30eb\u30ab\u30fc\u30c7\u30a3\u30ac\u30f3\u3010TV\u30c9\u30e9\u30de\u7740\u7528\u3011\u30b0\u30e9\u30d5\u30a3\u30c6\u30a3\u30fc\u30d0\u30c3\u30af\u30ea\u30dc\u30f3\u30ef\u30f3\u30d4\u30fc\u30b9\u3010TV\u30c9\u30e9\u30de\u7740\u7528\u3011\u30dc\u30fc\u30c0\u30fc\u30cf\u30a4\u30cd\u30c3\u30af\u30c8\u30c3\u30d7\u30b9\u3010TV\u30c9\u30e9\u30de\u7740\u7528\u3011\u30ec\u30aa\u30d1\u30fc\u30c9\u30df\u30c3\u30c9\u30ab\u30fc\u30d5\u30b9\u30ab\u30fc\u30c8\u3010\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7\u5bfe\u5fdc\u5546\u54c1\u3011\u8d77\u6bdb\u30cb\u30c3\u30c8\u30b9\u30ab\u30fc\u30c8\u3010\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7\u5bfe\u5fdc\u5546\u54c1\u3011\u8d77\u6bdb\u30cb\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc2way\u30b5\u30f3\u30b0\u30e9\u30b933\u30ca\u30f3\u30d0\u30fc\u30ea\u30f3\u30b0\u30cb\u30c3\u30c83D\u30b7\u30e7\u30eb\u30c0\u30fc\u30d5\u30ec\u30a2\u30fc\u30c9\u30ec\u30b93\u5468\u5e74\u30b9\u30ea\u30c3\u30d13\u5468\u5e74\u30e9\u30b0\u30de\u30c3\u30c83\u5468\u5e74\u30ed\u30c3\u30af\u30b0\u30e9\u30b9\u30ad\u30e3\u30f3\u30c9\u30ebLily\u3000Brown\u30002015\u5e74\u3000\u798f\u888bM
 IX\u30cb\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fcPeckham\u30ed\u30b4\u30cb\u30c3\u30c8\u30a2\u30f3\u30b4\u30e9\u30b8\u30e3\u30ac\u30fc\u30c9\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u30a2\u30f3\u30b4\u30e9\u30bf\u30fc\u30c8\u30eb\u30a2\u30f3\u30b4\u30e9\u30c1\u30e5\u30cb\u30c3\u30af\u30a2\u30f3\u30b4\u30e9\u30cb\u30c3\u30c8\u30ab\u30fc\u30c7\u30a3\u30ac\u30f3\u30a2\u30f3\u30b4\u30e9\u30cb\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u30a2\u30f3\u30b4\u30e9\u30d5\u30ec\u30a2\u30ef\u30f3\u30d4\u30fc\u30b9\u30a2\u30f3\u30b4\u30e9\u30ed\u30f3\u30b0\u30ab\u30fc\u30c7\u30a3\u30ac\u30f3\u30a2\u30f3\u30b4\u30e9\u30ef\u30c3\u30d5\u30eb\u30ab\u30fc\u30c7\u30a3\u30ac\u30f3\u30f4\u30a3\u30f3\u30c6\u30fc\u30b8\u30d5\u30a1\u30fc\u4ed8\u30b3\u30fc\u30c8\u30f4\u30a3\u30f3\u30c6\u30fc\u30b8\u30dc\u30fc\u30c0\u30fc\u30cb\u30c3\u30c8\u30f4\u30a3\u30f3\u30c6\u30fc\u30b8\u30ec\u30fc\u30b9\u30cf\u30a4\u30cd\u30c3\u30af\u30c8\u30c3\u30d7\u30b9\u30f4\u30a3\u30f3\u30c6\u30fc\u30b8\u30ec\u30fc\u30b9\u30d6\u30e9\u30a6\u30b9\u30a6\u30a8\u30b9\u30c8\u30b7\u30fc\u30b9\u30eb\u30fc\u30dc\u30fc\u30c0\u30fc\u30ef\u30f3\u30d4\u30fc\u30b9\u30aa\u30fc\u30ac\u30f3\u30b8\u30fc\u30e9\u30a4\u30f3\u30d5\u30ec\u30a2\u30b9\u30ab\u30fc\u30c8\u30aa\u30fc\u30d7\u30f3\u30b7\u30e7\u30eb\u30c0\u30fc\u30cb\u30c3\u30c8\u30c8\u30c3\u30d7\u30b9\u30aa\u30d5\u30b7\u30e7\u30eb\u30b7\u30e3\u30fc\u30ea\u30f3\u30b0\u30ef\u30f3\u30d4\u30fc\u30b9\u30aa\u30d5\u30b7\u30e7\u30eb\u30cb\u30c3\u30c8\u30aa\u30d5\u30b7\u30e7\u30eb\u30cb\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u30aa\u30d5\u30b7\u30e7\u30eb\u30dc\u30fc\u30c0\u30fc\u30ed\u30f3\u30d1\u30fc\u30b9\u30aa\u30d5\u30b7\u30e7\u30eb\u30ef\u30a4\u30c9\u30b3\u30f3\u30d3\u30cd\u30be\u30f3\u30aa\u30eb\u30c6\u30ac\u67c4\u30cb\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u30ab\u30b7\u30e5\u30af\u30fc\u30eb\u30aa\u30d5\u30b7\u30e7\u30eb\u30ef\u30f3\u30d4\u30fc\u30b9\u30ab\u30c3\u30c8\u30a2\u30b7\u30f3\u30e1\u30c8\u30ea\u30fc\u30c9\u30ec\u30b9\u30ab\u30c3
 \u30b5\u30c6\u30f3\u30d7\u30ea\u30fc\u30c4\u30d5\u30ec\u30a2\u30fc\u30b9\u30ab\u30fc\u30c8\u30ab\u30e9\u30fc\u30b9\u30fc\u30d1\u30fc\u30cf\u30a4\u30a6\u30a7\u30b9\u30c8\u30b9\u30ad\u30cb\u30fc\u30ab\u30e9\u30fc\u30d6\u30ed\u30c3\u30af\u30c9\u30ec\u30b9\u30ab\u30e9\u30fc\u30d6\u30ed\u30c3\u30af\u30cb\u30c3\u30c8\u30c1\u30e5\u30cb\u30c3\u30af\u30ae\u30e3\u30b6\u30fc\u30d5\u30ec\u30a2\u30b9\u30ab\u30fc\u30c8\u30ad\u30e9\u30ad\u30e9\u30b9\u30c8\u30e9\u30a4\u30d7\u30bf\u30a4\u30c8\u30b9\u30ab\u30fc\u30c8\u30ad\u30e9\u30ad\u30e9\u30b9\u30c8\u30e9\u30a4\u30d7\u30c9\u30ec\u30b9\u30ad\u30eb\u30c6\u30a3\u30f3\u30b0\u30d5\u30a1\u30fc\u30b3\u30fc\u30c8\u30b0\u30e9\u30c7\u30fc\u30b7\u30e7\u30f3\u30d9\u30a2\u30c9\u30ec\u30b9\u30b0\u30e9\u30c7\u30fc\u30b7\u30e7\u30f3\u30e9\u30a6\u30f3\u30c9\u30b5\u30f3\u30b0\u30e9\u30b9\u30b0\u30e9\u30d5\u30c6\u30a3\u30fc\u30aa\u30d5\u30b7\u30e7\u30eb\u30c8\u30c3\u30d7\u30b9\u30b0\u30e9\u30d5\u30c6\u30a3\u30fc\u30ad\u30e5\u30ed\u30c3\u30c8\u30b0\u30ea\u30c3\u30bf\u30fc\u30ea\u30dc\u30f3\u30d8\u30a2\u30b4\u30e0\u30af\u30ed\u30c3\u30d7\u30c9\u30d6\u30e9\u30a6\u30b9\u30b1\u30fc\u30d6\u30eb\u30cf\u30a4\u30a6\u30a8\u30b9\u30c8\u30b9\u30ab\u30fc\u30c8\u30b3\u30fc\u30c7\u30e5\u30ed\u30a4�\u30b9\u30a8\u30fc\u30c9\u30d1\u30cd\u30eb\u30b9\u30ab\u30fc\u30c8\u30b3\u30fc\u30c7\u30e5\u30ed\u30a4\u30bf\u30a4\u30c8\u30b9\u30ab\u30fc\u30c8\u30b4\u30fc\u30eb\u30c9\u30d0\u30c3\u30af\u30eb\u30d9\u30eb\u30c8\u4ed8\u30b9\u30ab\u30fc\u30c8\u30b4\u30b7\u30c3\u30af\u30d2\u30fc\u30eb\u30b7\u30e7\u30fc\u30c8\u30d6\u30fc\u30c4\u30b4\u30b7\u30c3\u30af\u67c4\u30cb\u30c3\u30c8\u30ef\u30f3\u30d4\u30b3\u30f3\u30d3\u30b9\u30bf\u30b8\u30e3\u30f3\u30b5\u30a4\u30c9\u30b9\u30c6\u30c3\u30c1\u30dc\u30fc\u30a4\u30ba\u30c7\u30cb\u30e0\u30d1\u30f3\u30c4\u30b5\u30b9\u30da\u3064\u304d\u30b7\u30e7\u30fc\u30c8\u30d1\u30f3\u30c4\u30b5\u30b9\u30da\u30f3\u30c0\u30fc\u4ed8\u30d7\u30ea\u30fc\u30c4\u30ed\u30f3\u30b0\u30b9\u30ab\u30fc\u30c8\u30b7\u30e3\u30fc\u30ea\u30f3\u30b0\u30bf\u30a4
 \u30c8\u30b9\u30ab\u30fc\u30c8\u30b8\u30e3\u30ac\u30fc\u30c9\u30bf\u30c3\u30af\u30ef\u30f3\u30d4\u30fc\u30b9\u30b9\u30a8\u30fc\u30c9\u30d5\u30ea\u30eb\u30d5\u30e9\u30ef\u30fc\u30d1\u30f3\u30c4\u30b9\u30a8\u30fc\u30c9\u88cf\u6bdb\u80a9\u7a7a\u304d\u30c8\u30c3\u30d7\u30b9\u30b9\u30af\u30a8\u30a2\u30b7\u30e7\u30eb\u30c0\u30fcBAG\u30b9\u30af\u30a8\u30a2\u30d0\u30c3\u30af\u30eb\u30b7\u30e7\u30eb\u30c0\u30fc\u30b9\u30af\u30a8\u30a2\u30df\u30cb\u30d0\u30c3\u30b0\u30b9\u30c8\u30fc\u30f3\u30d3\u30fc\u30c1\u30b5\u30f3\u30c0\u30eb\u30b9\u30c8\u30e9\u30a4\u30d7\u30b5\u30b9\u30da\u4ed8\u304d\u30b9\u30ad\u30cb\u30fc\u30b9\u30c8\u30e9\u30a4\u30d7\u30d0\u30c3\u30af\u30b9\u30ea\u30c3\u30c8\u30b7\u30e3\u30c4\u30b9\u30e9\u30a4\u30d0\u30fc\u30b7\u30e3\u30ae\u30fc\u30b3\u30fc\u30c8\u30bf\u30fc\u30c8\u30eb�\u30ec\u30fc\u30b9\u30bf\u30a4\u30c8\u30b9\u30ab\u30fc\u30c8\u30bf\u30fc\u30c8\u30eb\u30cb\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u30bf\u30a4\u30c8\u30b8\u30e3\u30f3\u30d1\u30fc\u30b9\u30ab\u30fc\u30c8\u30c0\u30d6\u30eb\u30af\u30ed\u30b9\u30c1\u30e5\u30fc\u30eb\u30d5\u30ec\u30a2\u30b9\u30ab\u30fc\u30c8\u30c0\u30d6\u30eb\u30b9\u30c8\u30e9\u30c3\u30d7\u30d1\u30f3\u30d7\u30b9\u30c0\u30d6\u30eb\u30cf\u30fc\u30c8\u30ea\u30f3\u30b0\u30c0\u30d6\u30eb\u30d5\u30a7\u30a4\u30b9\u30c1\u30a7\u30c3\u30af\u30b9\u30c8\u30fc\u30eb\u30c1\u30a7\u30fc\u30f3\u30b3\u30f3\u30d3\u30d3\u30b8\u30e5\u30fc\u30cd\u30c3\u30af\u30ec\u30b9\u30c1\u30a7\u30fc\u30f3\u30b3\u30f3\u30d3\u30d3\u30b8\u30e5\u30fc\u30d4\u30a2\u30b9\u30c1\u30a7\u30fc\u30f3\u30b3\u30f3\u30d3\u30d3\u30b8\u30e5\u30fc\u30d6\u30ec\u30b9\u30c1\u30a7\u30fc\u30f3\u30c4\u30d0\u5e83HAT\u30c1\u30a7\u30fc\u30f3\u30d3\u30b8\u30e5\u30fc\u30d4\u30a2\u30b9\u30c1\u30a7\u30c3\u30af\u30cb\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u30c1\u30a7\u30c3\u30af\u30cd\u30eb\u30df\u30c7\u30a3\u30a2\u30e0\u30b9\u30ab\u30fc\u30c8\u30c1\u30a7\u30c3\u30af\u67c4\u30b9\u30ad\u30cb\u30fc\u30d1\u30f3\u30c4\u30c1\u30e5\u30fc\u30eb\u30b3\u30f3\u30d3
 \u30b7\u30e1\u30c8\u30c3\u30d7\u30b9\u30c7\u30cb\u30e0\u30d5\u30ec\u30a2\u30fc\u30b9\u30ab\u30fc\u30c8\u30c9\u30c3\u30c8\u30aa\u30d5\u30b7\u30e7\u30eb\u30d5\u30ea\u30eb\u30d6\u30e9\u30a6\u30b9\u30c9\u30c3\u30c8\u30b8\u30e3\u30ac\u30fc\u30c9\u30c9\u30ec\u30b9\u30c9\u30c3\u30c8\u30cb\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u30c9\u30c3\u30c8\u30ec\u30fc\u30b9\u30c8\u30c3\u30d7\u30b9\u30cb\u30c3\u30c8�\u30aa\u30fc\u30ac\u30f3\u30b8\u30fc\u30b9\u30ab\u30fc\u30c8\u30bb\u30c3\u30c8\u30cb\u30c3\u30c8\u30ad\u30e3\u30df\u30bd\u30fc\u30eb\u30ef\u30f3\u30d4\u30fc\u30b9\u30cb\u30c3\u30c8\u30b9\u30cc\u30fc\u30c9\u30d1\u30fc\u30eb\u30b3\u30f3\u30d3\u30d5\u30fc\u30d7\u30d4\u30a2\u30b9\u30cf\u30a4\u30a6\u30a8\u30b9\u30c8\u30b7\u30e7\u30fc\u30c8\u30c7\u30cb\u30e0\u30cf\u30a4\u30a6\u30a8\u30b9\u30c8\u30bf\u30a4\u30c8\u30b9\u30ab\u30fc\u30c8\u30cf\u30a4\u30a6\u30a8\u30b9\u30c8\u30c7\u30cb\u30e0\u30b7\u30e7\u30fc\u30c8\u30d1\u30f3\u30c4\u30cf\u30a4\u30a6\u30a8\u30b9\u30c8\u30d7\u30ea\u30fc\u30c4\u30b9\u30ab\u30fc\u30c8\u30cf\u30a4\u30a6\u30a8\u30b9\u30c8\u30df\u30c3\u30c9\u30ab\u30fc\u30d5\u30b9\u30ab\u30fc\u30c8\u30cf\u30a4\u30b2\u30fc\u30b8\u30bf\u30fc\u30c8\u30eb\u30cb\u30c3\u30c8\u30cf\u30a4\u30b2\u30fc\u30b8\u30e9\u30a4\u30f3\u30cb\u30c3\u30c8\u30cf\u30a4\u30cd\u30c3\u30af\u5207\u308a\u66ff\u3048\u30b9\u30a6\u30a7\u30c3\u30c8\u30d0\u30bf\u30d5\u30e9\u30a4\u30cd\u30c3\u30af\u30ec\u30b9\u30d0\u30bf\u30d5\u30e9\u30a4\u30df\u30cb\u30d4\u30a2\u30b9\u30d0\u30bf\u30d5\u30e9\u30a4\u30ea\u30f3\u30b0\u30d0\u30c3\u30af\u30bf\u30f3\u30af\u30ea\u30d6\u30ef\u30f3\u30d4\u30fc\u30b9\u30d0\u30c3\u30af\u30ea\u30dc\u30f3\u30b9\u30ad\u30cb\u30fc\u30c7\u30cb\u30e0\u30d1\u30f3\u30c4\u30d0\u30c3\u30af\u30ea\u30dc\u30f3\u6df1V\u30ef\u30f3\u30d4\u30fc\u30b9\u30d3\u30b8\u30e5\u30fc\u30b9\u30c8\u30e9\u30c3\u30d7\u30b5\u30f3\u30c0\u30eb\u30d3\u30b9\u30c1\u30a7\u30b3\u30f3\u30d3\u30aa\u30d5\u30b7\u30e7\u30eb\u30d6\u30e9\u30a6\u30b9\u30d6\u30fc\u30af\u30ec\u30b8\u30e3\u30ac\u30fc\u30c9\u30cb
 \u30c3\u30c8\u30d5\u30a7\u30a4\u30af\u30e0\u30fc\u30c8\u30f3\u30b7\u30e7\u30fc\u30c8\u30b3\u30fc\u30c8\u30d5\u30a7\u30ec\u30c3\u30c8\u30ab\u30fc\u30c7\u30a3\u30ac\u30f3\u30d5\u30a7\u30ec\u30c3\u30c8\u30d3\u30c3\u30af\u30bf\u30fc\u30c8\u30eb\u30cb\u30c3\u30c8\u30d6\u30e9\u30a6\u30b8\u30f3\u30b0\u30af\u30eb\u30fc\u30d6\u30e9\u30a6\u30b9\u30d7\u30ea\u30fc\u30c4\u30d6\u30e9\u30a6\u30b9\u30d5\u30ea\u30eb\u30cb\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u30d5\u30ea\u30f3\u30b8\u30cb\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u30d5\u30ec\u30a2\u30cb\u30c3\u30c8\u30b9\u30ab\u30fc\u30c8\u30d6\u30ed\u30a6\u578b\u30b5\u30f3\u30b0\u30e9\u30b9\u30d9\u30fc\u30b7\u30c3\u30af\u30d5\u30a7\u30ec\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u30d9\u30eb\u30c8\u4ed8\u30ac\u30a6\u30c1\u30e7\u30d1\u30f3\u30c4\u30d9\u30eb\u30c8\u4ed8\u30b7\u30e7\u30fc\u30c8\u30d1\u30f3\u30c4\u30d9\u30eb\u30c8\u4ed8\u30bf\u30c3\u30af\u30b9\u30ab\u30fc\u30c8\u30d9\u30eb\u30c8\u4ed8\u30bf\u30c3\u30af\u30d1\u30f3\u30c4\u30d9\u30eb\u30d9\u30c3\u30c8\u30a4\u30f3\u30d2\u30fc\u30eb\u30d1\u30f3\u30d7\u30b9\u30d9\u30ed\u30a2\u30a6\u30a7\u30c3\u30b8\u30d1\u30f3\u30d7\u30b9\u30d9\u30ed\u30a2\u30df\u30c3\u30c9\u30ab\u30fc\u30d5\u30ef\u30f3\u30d4\u30fc\u30b9\u30d9\u30ed\u30a2\u30ef\u30f3\u30d4\u30fc\u30b9\u30d9\u30ed\u30a2\u98a8\u30cb\u30c3\u30c8\u30ab\u30fc\u30c7\u30a3\u30ac\u30f3\u30dc\u30a2\u4ed8\u30b3\u30fc\u30c8\u30dc\u30fc\u30c0\u30fcV\u30cd\u30c3\u30afT\u30b7\u30e3\u30c4\u30dc\u30fc\u30c0\u30fc\u30aa\u30d5\u30b7\u30e7\u30eb\u30ab\u30c3\u30c8\u30bd\u30fc\u30dc\u30fc\u30c0\u30fc\u30ab\u30c3\u30c8\u30bd\u30fc\u30ef\u30f3\u30d4\u30fc\u30b9\u30dc\u30fc\u30c0\u30fc\u30bf\u30a4\u30c8\u30ab\u30c3\u30c8\u30bd\u30fc\u30dc\u30fc\u30c0\u30fc\u30c8\u30c3\u30d7\u30b9\u30dc\u30fc\u30c0\u30fc\u30c8\u30c3\u30d7\u30b9�\u30b9\u30ab\u30fc\u30c8\u30bb\u30c3\u30c8\u30dc\u30b9\u30c8\u30f3\u30e1\u30ac\u30cd\u30de\u30aa\u30ab\u30e9\u30fc\u30b7\u30e3\u30c4\u30cb\u30c3\u30c8\u30bb\u30c3\u30c8\u30df\u30c3\u30af\u30b9
 \u30cb\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u30df\u30c3\u30c9\u30ab\u30fc\u30d5\u4e08\u30dd\u30f3\u30c1\u30b9\u30ab\u30fc\u30c8\u30df\u30ea\u30bf\u30ea\u30fc\u30ae\u30e3\u30b6\u30fc\u30b7\u30e7\u30fc\u30c8\u30d1\u30f3\u30c4\u30e1\u30c3\u30b7\u30e5\u30cf\u30a4\u30cd\u30c3\u30af\u30c8\u30c3\u30d7\u30b9\u30e1\u30eb\u30c8\u30f3P\u30b3\u30fc\u30c8\u30e1\u30eb\u30c8\u30f3\u30c0\u30c3\u30d5\u30eb\u30b3\u30fc\u30c8\u30e1\u30eb\u30c8\u30f3\u30c0\u30d6\u30eb\u30b3\u30fc\u30c8\u30e2\u30d8\u30a2\u30cb\u30c3\u30c8\u30ab\u30fc\u30c7\u30a3\u30ac\u30f3\u30e2\u30d8\u30a2\u30cb\u30c3\u30c8\u30bf\u30fc\u30c8\u30eb\u30e6\u30ea\u67c4\u30d7\u30ea\u30fc\u30c4\u30d5\u30ec\u30a2\u30fc\u30b9\u30ab\u30fc\u30c8\u30e9\u30a4\u30c0\u30fc\u30b9\u30c7\u30cb\u30e0\u30b8\u30e3\u30b1\u30c3\u30c8\u30e9\u30a4\u30ca\u30fc\u4ed8\u30c1\u30a7\u30b9\u30bf\u30fc\u30b3\u30fc\u30c8\u30e9\u30c3\u30d5\u30eb\u30d7\u30ea\u30fc\u30c4\u30d6\u30e9\u30a6\u30b9\u30e9\u30e1\u30b8\u30e3\u30ac\u30fc\u30c9\u30cf\u30a4\u30b2\u30fc\u30b8\u30cb\u30c3\u30c8\u30ea\u30d6\u30cb\u30c3\u30c8\u30ef\u30f3\u30d4\u30ea\u30dc\u30f3�\u30d1\u30fc\u30eb\u30d0\u30ec\u30c3\u30bf\u30ea\u30dc\u30f3\u30d0\u30ec\u30c3\u30bf\u30ea\u30dc\u30f3\u30d9\u30eb\u30c8\u30cf\u30a4\u30a6\u30a8\u30b9\u30c8\u30d1\u30f3\u30c4\u30ea\u30ea\u30fc\u523a\u7e4d\u958b\u895f\u30d6\u30e9\u30a6\u30b9\u30ec\u30fc\u30b9\u30d3\u30b9\u30c1\u30a7\u30ed\u30fc\u30d5\u30a1\u30fc\u30b5\u30dc\u30ed\u30b4\u30cb\u30c3\u30c8\u30ad\u30e3\u30c3\u30d7\u30ed\u30b4\u523a\u7e4d\u30cb\u30c3\u30c8\u30ef\u30c3\u30c1\u30ed\u30f3\u30b0\u30cb\u30c3\u30c8\u30ac\u30a6\u30f3\u30ef\u30c3\u30d5\u30eb\u30a2\u30f3\u30b4\u30e9\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc\u30ef\u30f3\u30b7\u30e7\u30eb\u30c0\u30ef\u30fc\u30ef\u30f3\u30d4\u30fc\u30b9\u5149\u6ca2\u30e9\u30e1\u30cb\u30c3\u30c8\u30ab\u30fc\u30c7\u30a3\u30ac\u30f3\u523a\u7e4d\u30b7\u30d5\u30a9\u30f3\u30d6\u30e9\u30a6\u30b9\u53f0\u5f62\u30df\u30cb\u30b9\u30ab\u30fc\u30c8\u914d\u8272\u30cb\u30c3\u30c8\u30d7\u30eb\u30aa\u30fc\u30d0
 \u88cf\u6bdb\u30d7\u30eb\u30aa\u30fc\u30d0\u30fc�\u30aa\u30fc\u30ac\u30f3\u30b8\u30fc\u30b9\u30ab\u30fc\u30c8\u30bb\u30c3\u30c8";
+
+    JapaneseTokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), readDict(), false, Mode.NORMAL);
+    tokenizer.setReader(new StringReader(doc));
+    tokenizer.reset();
+    while (tokenizer.incrementToken());
+  }
 }