You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/07/23 17:36:26 UTC

[01/13] lucenenet git commit: Ported Lucene.Net.Analysis.Kuromoji + tests

Repository: lucenenet
Updated Branches:
  refs/heads/master e67244aa2 -> 2d5108ba0


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/project.json b/src/Lucene.Net.Tests.Analysis.Kuromoji/project.json
new file mode 100644
index 0000000..5badefa
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/project.json
@@ -0,0 +1,43 @@
+{
+  "version": "4.8.0",
+  "title": "Lucene.Net.Tests.Analysis.Kuromoji",
+  "buildOptions": {
+    "compile": {
+      "includeFiles": [ "../CommonAssemblyInfo.cs" ]
+    },
+    "embed": {
+      "includeFiles": [
+        "bocchan.utf-8",
+        "search-segmentation-tests.txt",
+        "userdict.txt"
+      ]
+    }
+  },
+  "dependencies": {
+    "dotnet-test-nunit-teamcity": "3.4.0-beta-3",
+    "Lucene.Net.Analysis.Kuromoji": "4.8.0",
+    "Lucene.Net.TestFramework": "4.8.0",
+    "NUnit": "3.5.0"
+  },
+  "testRunner": "nunit-teamcity",
+  "frameworks": {
+    "netcoreapp1.0": {
+      "imports": "dnxcore50",
+      "buildOptions": {
+        "debugType": "portable",
+        "define": [ "NETSTANDARD" ]
+      }
+    },
+    "net451": {
+      "buildOptions": {
+        "debugType": "full",
+        "define": [ "FEATURE_SERIALIZABLE" ]
+      }
+    }
+  },
+
+  "runtimes": {
+    "win7-x86": {},
+    "win7-x64": {}
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/search-segmentation-tests.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/search-segmentation-tests.txt b/src/Lucene.Net.Tests.Analysis.Kuromoji/search-segmentation-tests.txt
new file mode 100644
index 0000000..835446f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/search-segmentation-tests.txt
@@ -0,0 +1,142 @@
+###
+### Tests for Kuromoji's search mode heuristic
+###
+### In search-mode, Kuromoji uses a heuristic to do extra splitting of words
+### to get a decompounding effect useful for search.  This file includes tests
+### for this heuristic and demonstrates its usefulness, but also weaknesses.
+###
+### This file's format is as follows:
+###	  <text><tab><token1> <token2> ... <token>
+###
+### This file should use UTF-8 encoding and there is one test per line.  The
+### text to be segmented and its expected surface form token sequence is 
+### separated by a tab ('\t').  Tokens are  separated by a half-width space.
+### Whitespace lines and lines starting with a '#' are ignored.  Comments
+### are not allowed on entry line.
+###
+### NOTE: These tests depends on IPADIC
+###
+### Revision history:
+###  - 2012-01-29: Initial version
+###
+
+##
+## Organizations
+##
+
+# Kansai Internationl Airport
+関西国際空港	関西 関西国際空港/0 国際 空港
+# Narita Airport
+成田空港	成田 成田空港/0 空港
+# Haneda Airport
+羽田空港	羽田 羽田空港/0 空港
+# Nara Institute of Science and Technology
+奈良先端科学技術大学院大学	奈良 奈良先端科学技術大学院大学/0 先端 科学 技術 大学院 大学
+# Tokyo University
+東京大学	東京 東京大学/0 大学
+# Kyoto University
+京都大学	京都 京都大学/0 大学
+
+# NOTE: differs from non-compound mode:
+# Kyoto University Baseball Club
+京都大学硬式野球部	京都大 学 硬式 野球 部
+
+##
+## Katakana titles
+##
+
+# Senior Software Engineer
+シニアソフトウェアエンジニア	シニア シニアソフトウェアエンジニア/0 ソフトウェア エンジニア
+# Software Engineer
+ソフトウェアエンジニア	ソフトウェア エンジニア
+# Senior Project Manager
+シニアプロジェクトマネジャー	シニア シニアプロジェクトマネジャー/0 プロジェクト マネジャー
+# Project Manager
+プロジェクトマネジャー	プロジェクト マネジャー
+# Senior Sales Engineer
+シニアセールスエンジニア	シニア シニアセールスエンジニア/0 セールス エンジニア
+# System Architect
+システムアーキテクト	システム システムアーキテクト/0 アーキテクト
+# Senior System Architect
+シニアシステムアーキテクト	シニア シニアシステムアーキテクト/0 システム アーキテクト
+# System Administrator
+システムアドミニストレータ	システム アドミニストレータ
+システムアドミニストレーター	システム システムアドミニストレーター/0 アドミニストレーター
+# Senior System Administrator
+シニアシステムアドミニストレーター	シニア シニアシステムアドミニストレーター/0 システム アドミニストレーター
+
+##
+## Company names (several are fictitious)
+##
+
+# SoftBank Mobile
+ソフトバンクモバイル	ソフトバンク モバイル
+# Alpine Materials
+アルパインマテリアルズ	アルパイン アルパインマテリアルズ/0 マテリアルズ
+# Sapporo Holdings
+サッポロホールディングス	サッポロ ホールディングス
+# Yamada Corporation
+ヤマダコーポレーション	ヤマダ ヤマダコーポレーション/0 コーポレーション
+# Canon Semiconductor equipement	NOTE: Semiconductor becomes semi + conductor
+キヤノンセミコンダクターエクィップメント	キヤノン キヤノンセミコンダクターエクィップメント/0 セミ コンダクター エクィップメント
+# Orental Chain
+オリエンタルチエン	オリエンタル オリエンタルチエン/0 チエン
+# Ally Projects Japan	NOTE: Becomes one token as プロジェクツ is not in IPADIC
+アーリープロジェクツジャパン	アーリープロジェクツジャパン
+# Peter Pan Corporation
+ピーターパンコーポレーション	ピーター ピーターパンコーポレーション/0 パン コーポレーション
+# AIM Create
+エイムクリエイツ	エイムクリエイツ
+# Mars Engineering
+マースエンジニアリング	マース マースエンジニアリング/0 エンジニアリング
+# Fuji Protein Technology
+フジプロテインテクノロジー	フジ フジプロテインテクノロジー/0 プロテイン テクノロジー
+
+##
+## Person names
+##
+
+# Michael Jackson
+マイケルジャクソン	マイケル ジャクソン
+# Steve Jobs
+スティーブジョブズ	スティーブ ジョブズ
+# Harry Potter	NOTE: Becomes one token (short word)
+ハリーポッター	ハリーポッター
+# Bill Gates	NOTE: Becomes one token (short word)
+ビルゲイツ	ビルゲイツ
+# Sean Connery	NOTE: Becomes one token (okay)
+ショーンコネリー	ショーンコネリー
+
+##
+## Other nouns
+##
+
+# Holdings
+ホールディングス	ホールディングス
+# Engineering
+エンジニアリング	エンジニアリング
+# Software Engineering
+ソフトウェアエンジニアリング	ソフトウェア エンジニアリング
+# Shopping center
+ショッピングセンター	ショッピング センター
+# Game center (arcade)	NOTE: One token because of short word
+ゲームセンター	ゲームセンター
+# Christmas shopping
+クリスマスショッピング	クリスマス ショッピング
+# Download file
+ダウンロードファイル	ダウンロード ファイル
+# Technology
+テクノロジー	テクノロジー
+# Lillehammer Olympics
+リレハンメルオリンピック	リレハンメル オリンピック
+
+##
+## Problematic terms
+##
+
+# JT Engineering	NOTE: Becomes J Tien ginia ring (substrings are in IPADIC)
+ジェイティエンジニアリング	ジェイ ジェイティエンジニアリング/0 ティエン ジニア リング
+# Anchovy pasta	NOTE: Become Anch yvipasta
+アンチョビパスタ	アンチ アンチョビパスタ/0 ョビパスタ
+# Surprise gift	NOTE: Becomes one token (surprise not in IPADIC)
+サプライズギフト	サプライズギフト

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/userdict.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/userdict.txt b/src/Lucene.Net.Tests.Analysis.Kuromoji/userdict.txt
new file mode 100644
index 0000000..f9db02c
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/userdict.txt
@@ -0,0 +1,10 @@
+# Custom segmentation for long entries
+日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
+関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,テスト名詞
+
+# Custom reading for sumo wrestler
+朝青龍,朝青龍,アサショウリュウ,カスタム人名
+
+# Silly entry:
+abcd,a b cd,foo1 foo2 foo3,bar
+abcdefg,ab cd efg,foo1 foo2 foo4,bar

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net/Support/Collections.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net/Support/Collections.cs b/src/Lucene.Net/Support/Collections.cs
index dcafc25..3ded8e3 100644
--- a/src/Lucene.Net/Support/Collections.cs
+++ b/src/Lucene.Net/Support/Collections.cs
@@ -54,6 +54,15 @@ namespace Lucene.Net.Support
             return new SetFromMap<T>(map);
         }
 
+        public static void Reverse<T>(IList<T> list)
+        {
+            int size = list.Count;
+            for (int i = 0, mid = size >> 1, j = size - 1; i < mid; i++, j--)
+            {
+                Swap(list, i, j);
+            }
+        }
+
         public static IComparer<T> ReverseOrder<T>()
         {
             return (IComparer<T>)ReverseComparer<T>.REVERSE_ORDER;

[10/13] lucenenet git commit: Ported Lucene.Net.Analysis.Kuromoji + tests

Posted by ni...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapaneseBaseFormFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseBaseFormFilterFactory.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseBaseFormFilterFactory.cs
new file mode 100644
index 0000000..5524be7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseBaseFormFilterFactory.cs
@@ -0,0 +1,52 @@
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="JapaneseBaseFormFilter"/>.
+    /// <code>
+    /// &lt;fieldType name="text_ja" class="solr.TextField"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.JapaneseTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.JapaneseBaseFormFilterFactory"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    public class JapaneseBaseFormFilterFactory : TokenFilterFactory
+    {
+        /// <summary>Creates a new <see cref="JapaneseBaseFormFilterFactory"/></summary>
+        public JapaneseBaseFormFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            if (args.Count > 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new JapaneseBaseFormFilter(input);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapaneseIterationMarkCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseIterationMarkCharFilter.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseIterationMarkCharFilter.cs
new file mode 100644
index 0000000..71566bb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseIterationMarkCharFilter.cs
@@ -0,0 +1,500 @@
+using Lucene.Net.Analysis.Util;
+using System.Diagnostics;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Normalizes Japanese horizontal iteration marks (odoriji) to their expanded form.
+    /// </summary>
+    /// <remarks>
+    /// Sequences of iteration marks are supported.  In case an illegal sequence of iteration
+    /// marks is encountered, the implementation emits the illegal source character as-is
+    /// without considering its script.  For example, with input "&#x003f;&#x309d", we get
+    /// "&#x003f;&#x003f" even though "&#x003f;" isn't hiragana.
+    /// <para/>
+    /// Note that a full stop punctuation character "&#x3002;" (U+3002) can not be iterated
+    /// (see below). Iteration marks themselves can be emitted in case they are illegal,
+    /// i.e. if they go back past the beginning of the character stream.
+    /// <para/>
+    /// The implementation buffers input until a full stop punctuation character (U+3002)
+    /// or EOF is reached in order to not keep a copy of the character stream in memory.
+    /// Vertical iteration marks, which are even rarer than horizontal iteration marks in
+    /// contemporary Japanese, are unsupported.
+    /// </remarks>
+    public class JapaneseIterationMarkCharFilter : CharFilter
+    {
+        /// <summary>Normalize kanji iteration marks by default</summary>
+        public static readonly bool NORMALIZE_KANJI_DEFAULT = true;
+
+        /// <summary>Normalize kana iteration marks by default</summary>
+        public static readonly bool NORMALIZE_KANA_DEFAULT = true;
+
+        private const char KANJI_ITERATION_MARK = '\u3005';           // 々
+
+        private const char HIRAGANA_ITERATION_MARK = '\u309d';        // ゝ
+
+        private const char HIRAGANA_VOICED_ITERATION_MARK = '\u309e'; // ゞ
+
+        private const char KATAKANA_ITERATION_MARK = '\u30fd';        // ヽ
+
+        private const char KATAKANA_VOICED_ITERATION_MARK = '\u30fe'; // ヾ
+
+        private const char FULL_STOP_PUNCTUATION = '\u3002';           // 。
+
+        // Hiragana to dakuten map (lookup using code point - 0x30ab（か）*/
+        private static char[] h2d = new char[50];
+
+        // Katakana to dakuten map (lookup using code point - 0x30ab（カ
+        private static char[] k2d = new char[50];
+
+        private readonly RollingCharBuffer buffer = new RollingCharBuffer();
+
+        private int bufferPosition = 0;
+
+        private int iterationMarksSpanSize = 0;
+
+        private int iterationMarkSpanEndPosition = 0;
+
+        private bool normalizeKanji;
+
+        private bool normalizeKana;
+
+        static JapaneseIterationMarkCharFilter()
+        {
+            // Hiragana dakuten map
+            h2d[0] = '\u304c';  // か => が
+            h2d[1] = '\u304c';  // が => が
+            h2d[2] = '\u304e';  // き => ぎ
+            h2d[3] = '\u304e';  // ぎ => ぎ
+            h2d[4] = '\u3050';  // く => ぐ
+            h2d[5] = '\u3050';  // ぐ => ぐ
+            h2d[6] = '\u3052';  // け => げ
+            h2d[7] = '\u3052';  // げ => げ
+            h2d[8] = '\u3054';  // こ => ご
+            h2d[9] = '\u3054';  // ご => ご
+            h2d[10] = '\u3056'; // さ => ざ
+            h2d[11] = '\u3056'; // ざ => ざ
+            h2d[12] = '\u3058'; // し => じ
+            h2d[13] = '\u3058'; // じ => じ
+            h2d[14] = '\u305a'; // す => ず
+            h2d[15] = '\u305a'; // ず => ず
+            h2d[16] = '\u305c'; // せ => ぜ
+            h2d[17] = '\u305c'; // ぜ => ぜ
+            h2d[18] = '\u305e'; // そ => ぞ
+            h2d[19] = '\u305e'; // ぞ => ぞ
+            h2d[20] = '\u3060'; // た => だ
+            h2d[21] = '\u3060'; // だ => だ
+            h2d[22] = '\u3062'; // ち => ぢ
+            h2d[23] = '\u3062'; // ぢ => ぢ
+            h2d[24] = '\u3063';
+            h2d[25] = '\u3065'; // つ => づ
+            h2d[26] = '\u3065'; // づ => づ
+            h2d[27] = '\u3067'; // て => で
+            h2d[28] = '\u3067'; // で => で
+            h2d[29] = '\u3069'; // と => ど
+            h2d[30] = '\u3069'; // ど => ど
+            h2d[31] = '\u306a';
+            h2d[32] = '\u306b';
+            h2d[33] = '\u306c';
+            h2d[34] = '\u306d';
+            h2d[35] = '\u306e';
+            h2d[36] = '\u3070'; // は => ば
+            h2d[37] = '\u3070'; // ば => ば
+            h2d[38] = '\u3071';
+            h2d[39] = '\u3073'; // ひ => び
+            h2d[40] = '\u3073'; // び => び
+            h2d[41] = '\u3074';
+            h2d[42] = '\u3076'; // ふ => ぶ
+            h2d[43] = '\u3076'; // ぶ => ぶ
+            h2d[44] = '\u3077';
+            h2d[45] = '\u3079'; // へ => べ
+            h2d[46] = '\u3079'; // べ => べ
+            h2d[47] = '\u307a';
+            h2d[48] = '\u307c'; // ほ => ぼ
+            h2d[49] = '\u307c'; // ぼ => ぼ
+
+            // Make katakana dakuten map from hiragana map
+            char codePointDifference = (char)('\u30ab' - '\u304b'); // カ - か
+            Debug.Assert(h2d.Length == k2d.Length);
+            for (int i = 0; i < k2d.Length; i++)
+            {
+                k2d[i] = (char)(h2d[i] + codePointDifference);
+            }
+        }
+
+        /// <summary>
+        /// Constructor. Normalizes both kanji and kana iteration marks by default.
+        /// </summary>
+        /// <param name="input">Char stream.</param>
+        public JapaneseIterationMarkCharFilter(TextReader input)
+            : this(input, NORMALIZE_KANJI_DEFAULT, NORMALIZE_KANA_DEFAULT)
+        {
+        }
+
+        /// <summary>
+        /// Constructor
+        /// </summary>
+        /// <param name="input">Char stream.</param>
+        /// <param name="normalizeKanji">Indicates whether kanji iteration marks should be normalized.</param>
+        /// <param name="normalizeKana">Indicates whether kana iteration marks should be normalized.</param>
+        public JapaneseIterationMarkCharFilter(TextReader input, bool normalizeKanji, bool normalizeKana)
+            : base(input)
+        {
+            this.normalizeKanji = normalizeKanji;
+            this.normalizeKana = normalizeKana;
+            buffer.Reset(input);
+        }
+
+        /// <summary>
+        /// Reads a specified maximum number of characters from the current reader and writes the data to a buffer, beginning at the specified index.
+        /// </summary>
+        /// <param name="buffer">
+        /// When this method returns, contains the specified character array with the values between index and (index + count - 1) 
+        /// replaced by the characters read from the current source.</param>
+        /// <param name="offset">
+        /// The position in buffer at which to begin writing.
+        /// </param>
+        /// <param name="length">
+        /// The maximum number of characters to read. If the end of the reader is reached before the specified number of characters is 
+        /// read into the buffer, the method returns.
+        /// </param>
+        /// <returns>
+        /// The number of characters that have been read. The number will be less than or equal to count, depending on whether the data is 
+        /// available within the reader. This method returns 0 (zero) if it is called when no more characters are left to read.
+        /// </returns>
+        public override int Read(char[] buffer, int offset, int length)
+        {
+            int read = 0;
+
+            for (int i = offset; i < offset + length; i++)
+            {
+                int c = Read();
+                if (c == -1)
+                {
+                    break;
+                }
+                buffer[i] = (char)c;
+                read++;
+            }
+
+            return read == 0 ? -1 : read;
+        }
+
+        /// <summary>
+        /// Reads the next character from the text reader and advances the character position by one character.
+        /// </summary>
+        /// <returns>The next character from the text reader, or -1 if no more characters are available.</returns>
+        public override int Read()
+        {
+            int ic = buffer.Get(bufferPosition);
+
+            // End of input
+            if (ic == -1)
+            {
+                buffer.FreeBefore(bufferPosition);
+                return ic;
+            }
+
+            char c = (char)ic;
+
+            // Skip surrogate pair characters
+            if (char.IsHighSurrogate(c) || char.IsLowSurrogate(c))
+            {
+                iterationMarkSpanEndPosition = bufferPosition + 1;
+            }
+
+            // Free rolling buffer on full stop
+            if (c == FULL_STOP_PUNCTUATION)
+            {
+                buffer.FreeBefore(bufferPosition);
+                iterationMarkSpanEndPosition = bufferPosition + 1;
+            }
+
+            // Normalize iteration mark
+            if (IsIterationMark(c))
+            {
+                c = NormalizeIterationMark(c);
+            }
+
+            bufferPosition++;
+            return c;
+        }
+
+        /// <summary>
+        /// Normalizes the iteration mark character <paramref name="c"/>
+        /// </summary>
+        /// <param name="c">Iteration mark character to normalize.</param>
+        /// <returns>Normalized iteration mark.</returns>
+        /// <exception cref="IOException">If there is a low-level I/O error.</exception>
+        private char NormalizeIterationMark(char c)
+        {
+
+            // Case 1: Inside an iteration mark span
+            if (bufferPosition < iterationMarkSpanEndPosition)
+            {
+                return Normalize(SourceCharacter(bufferPosition, iterationMarksSpanSize), c);
+            }
+
+            // Case 2: New iteration mark spans starts where the previous one ended, which is illegal
+            if (bufferPosition == iterationMarkSpanEndPosition)
+            {
+                // Emit the illegal iteration mark and increase end position to indicate that we can't
+                // start a new span on the next position either
+                iterationMarkSpanEndPosition++;
+                return c;
+            }
+
+            // Case 3: New iteration mark span
+            iterationMarksSpanSize = NextIterationMarkSpanSize();
+            iterationMarkSpanEndPosition = bufferPosition + iterationMarksSpanSize;
+            return Normalize(SourceCharacter(bufferPosition, iterationMarksSpanSize), c);
+        }
+
+        /// <summary>
+        /// Finds the number of subsequent next iteration marks
+        /// </summary>
+        /// <returns>Number of iteration marks starting at the current buffer position.</returns>
+        /// <exception cref="IOException">If there is a low-level I/O error.</exception>
+        private int NextIterationMarkSpanSize()
+        {
+            int spanSize = 0;
+            for (int i = bufferPosition; buffer.Get(i) != -1 && IsIterationMark((char)(buffer.Get(i))); i++)
+            {
+                spanSize++;
+            }
+            // Restrict span size so that we don't go past the previous end position
+            if (bufferPosition - spanSize < iterationMarkSpanEndPosition)
+            {
+                spanSize = bufferPosition - iterationMarkSpanEndPosition;
+            }
+            return spanSize;
+        }
+
+        /// <summary>
+        /// Returns the source character for a given position and iteration mark span size.
+        /// </summary>
+        /// <param name="position">Buffer position (should not exceed bufferPosition).</param>
+        /// <param name="spanSize">Iteration mark span size.</param>
+        /// <returns>Source character.</returns>
+        /// <exception cref="IOException">If there is a low-level I/O error.</exception>
+        private char SourceCharacter(int position, int spanSize)
+        {
+            return (char)buffer.Get(position - spanSize);
+        }
+
+        /// <summary>
+        /// Normalize a character.
+        /// </summary>
+        /// <param name="c">Character to normalize.</param>
+        /// <param name="m">Repetition mark referring to <paramref name="c"/>.</param>
+        /// <returns>Normalized character - return c on illegal iteration marks.</returns>
+        private char Normalize(char c, char m)
+        {
+            if (IsHiraganaIterationMark(m))
+            {
+                return NormalizedHiragana(c, m);
+            }
+
+            if (IsKatakanaIterationMark(m))
+            {
+                return NormalizedKatakana(c, m);
+            }
+
+            return c; // If m is not kana and we are to normalize it, we assume it is kanji and simply return it
+        }
+
+        /// <summary>
+        /// Normalize hiragana character.
+        /// </summary>
+        /// <param name="c">Hiragana character.</param>
+        /// <param name="m">Repetition mark referring to <paramref name="c"/>.</param>
+        /// <returns>Normalized character - return <paramref name="c"/> on illegal iteration marks.</returns>
+        private char NormalizedHiragana(char c, char m)
+        {
+            switch (m)
+            {
+                case HIRAGANA_ITERATION_MARK:
+                    return IsHiraganaDakuten(c) ? (char)(c - 1) : c;
+                case HIRAGANA_VOICED_ITERATION_MARK:
+                    return LookupHiraganaDakuten(c);
+                default:
+                    return c;
+            }
+        }
+
+        /// <summary>
+        /// Normalize katakana character.
+        /// </summary>
+        /// <param name="c">Katakana character.</param>
+        /// <param name="m">Repetition mark referring to <paramref name="c"/>.</param>
+        /// <returns>Normalized character - return <paramref name="c"/> on illegal iteration marks.</returns>
+        private char NormalizedKatakana(char c, char m)
+        {
+            switch (m)
+            {
+                case KATAKANA_ITERATION_MARK:
+                    return IsKatakanaDakuten(c) ? (char)(c - 1) : c;
+                case KATAKANA_VOICED_ITERATION_MARK:
+                    return LookupKatakanaDakuten(c);
+                default:
+                    return c;
+            }
+        }
+
+        /// <summary>
+        /// Iteration mark character predicate.
+        /// </summary>
+        /// <param name="c">Character to test.</param>
+        /// <returns><c>true</c> if <paramref name="c"/> is an iteration mark character.  Otherwise <c>false</c>.</returns>
+        private bool IsIterationMark(char c)
+        {
+            return IsKanjiIterationMark(c) || IsHiraganaIterationMark(c) || IsKatakanaIterationMark(c);
+        }
+
+        /// <summary>
+        /// Hiragana iteration mark character predicate.
+        /// </summary>
+        /// <param name="c">Character to test.</param>
+        /// <returns><c>true</c> if <paramref name="c"/> is a hiragana iteration mark character.  Otherwise <c>false</c>.</returns>
+        private bool IsHiraganaIterationMark(char c)
+        {
+            if (normalizeKana)
+            {
+                return c == HIRAGANA_ITERATION_MARK || c == HIRAGANA_VOICED_ITERATION_MARK;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        /// <summary>
+        /// Katakana iteration mark character predicate.
+        /// </summary>
+        /// <param name="c">Character to test.</param>
+        /// <returns><c>true</c> if c is a katakana iteration mark character.  Otherwise <c>false</c>.</returns>
+        private bool IsKatakanaIterationMark(char c)
+        {
+            if (normalizeKana)
+            {
+                return c == KATAKANA_ITERATION_MARK || c == KATAKANA_VOICED_ITERATION_MARK;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        /// <summary>
+        /// Kanji iteration mark character predicate.
+        /// </summary>
+        /// <param name="c">Character to test.</param>
+        /// <returns><c>true</c> if c is a kanji iteration mark character.  Otherwise <c>false</c>.</returns>
+        private bool IsKanjiIterationMark(char c)
+        {
+            if (normalizeKanji)
+            {
+                return c == KANJI_ITERATION_MARK;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        /// <summary>
+        /// Look up hiragana dakuten.
+        /// </summary>
+        /// <param name="c">Character to look up.</param>
+        /// <returns>Hiragana dakuten variant of c or c itself if no dakuten variant exists.</returns>
+        private char LookupHiraganaDakuten(char c)
+        {
+            return Lookup(c, h2d, '\u304b'); // Code point is for か
+        }
+
+        /// <summary>
+        /// Look up katakana dakuten. Only full-width katakana are supported.
+        /// </summary>
+        /// <param name="c">Character to look up.</param>
+        /// <returns>Katakana dakuten variant of <paramref name="c"/> or <paramref name="c"/> itself if no dakuten variant exists.</returns>
+        private char LookupKatakanaDakuten(char c)
+        {
+            return Lookup(c, k2d, '\u30ab'); // Code point is for カ
+        }
+
+        /// <summary>
+        /// Hiragana dakuten predicate.
+        /// </summary>
+        /// <param name="c">Character to check.</param>
+        /// <returns><c>true</c> if c is a hiragana dakuten and otherwise <c>false</c>.</returns>
+        private bool IsHiraganaDakuten(char c)
+        {
+            return Inside(c, h2d, '\u304b') && c == LookupHiraganaDakuten(c);
+        }
+
+        /// <summary>
+        /// Katakana dakuten predicate.
+        /// </summary>
+        /// <param name="c">Character to check.</param>
+        /// <returns><c>true</c> if c is a hiragana dakuten and otherwise <c>false</c>.</returns>
+        private bool IsKatakanaDakuten(char c)
+        {
+            return Inside(c, k2d, '\u30ab') && c == LookupKatakanaDakuten(c);
+        }
+
+        /// <summary>
+        /// Looks up a character in dakuten map and returns the dakuten variant if it exists.
+        /// Otherwise return the character being looked up itself.
+        /// </summary>
+        /// <param name="c">Character to look up.</param>
+        /// <param name="map">Dakuten map.</param>
+        /// <param name="offset">Code point offset from <paramref name="c"/>.</param>
+        /// <returns>Mapped character or <paramref name="c"/> if no mapping exists.</returns>
+        private char Lookup(char c, char[] map, char offset)
+        {
+            if (!Inside(c, map, offset))
+            {
+                return c;
+            }
+            else
+            {
+                return map[c - offset];
+            }
+        }
+
+        /// <summary>
+        /// Predicate indicating if the lookup character is within dakuten map range.
+        /// </summary>
+        /// <param name="c">Character to look up.</param>
+        /// <param name="map">Dakuten map.</param>
+        /// <param name="offset">Code point offset from <paramref name="c"/>.</param>
+        /// <returns><c>true</c> if <paramref name="c"/> is mapped by map and otherwise <c>false</c>.</returns>
+        private bool Inside(char c, char[] map, char offset)
+        {
+            return c >= offset && c < offset + map.Length;
+        }
+
+        protected override int Correct(int currentOff)
+        {
+            return currentOff; // this filter doesn't change the length of strings
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapaneseIterationMarkCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseIterationMarkCharFilterFactory.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseIterationMarkCharFilterFactory.cs
new file mode 100644
index 0000000..c9518c9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseIterationMarkCharFilterFactory.cs
@@ -0,0 +1,66 @@
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="JapaneseIterationMarkCharFilter"/>.
+    /// <code>
+    /// &lt;fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;charFilter class="solr.JapaneseIterationMarkCharFilterFactory normalizeKanji="true" normalizeKana="true"/&gt;
+    ///     &lt;tokenizer class="solr.JapaneseTokenizerFactory"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    public class JapaneseIterationMarkCharFilterFactory : CharFilterFactory, IMultiTermAwareComponent
+    {
+        private static readonly string NORMALIZE_KANJI_PARAM = "normalizeKanji";
+        private static readonly string NORMALIZE_KANA_PARAM = "normalizeKana";
+
+        private readonly bool normalizeKanji;
+        private readonly bool normalizeKana;
+
+        /// <summary>Creates a new <see cref="JapaneseIterationMarkCharFilterFactory"/></summary>
+        public JapaneseIterationMarkCharFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            normalizeKanji = GetBoolean(args, NORMALIZE_KANJI_PARAM, JapaneseIterationMarkCharFilter.NORMALIZE_KANJI_DEFAULT);
+            normalizeKana = GetBoolean(args, NORMALIZE_KANA_PARAM, JapaneseIterationMarkCharFilter.NORMALIZE_KANA_DEFAULT);
+            if (args.Count > 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TextReader Create(TextReader input)
+        {
+            return new JapaneseIterationMarkCharFilter(input, normalizeKanji, normalizeKana);
+        }
+
+        public virtual AbstractAnalysisFactory GetMultiTermComponent()
+        {
+            return this;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapaneseKatakanaStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseKatakanaStemFilter.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseKatakanaStemFilter.cs
new file mode 100644
index 0000000..857e5bf
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseKatakanaStemFilter.cs
@@ -0,0 +1,111 @@
+using Lucene.Net.Analysis.TokenAttributes;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A <see cref="TokenFilter"/> that normalizes common katakana spelling variations
+    /// ending in a long sound character by removing this character (U+30FC).  Only
+    /// katakana words longer than a minimum length are stemmed (default is four).
+    /// </summary>
+    /// <remarks>
+    /// Note that only full-width katakana characters are supported.  Please use a
+    /// <see cref="Cjk.CJKWidthFilter"/> to convert half-width
+    /// katakana to full-width before using this filter.
+    /// <para/>
+    /// In order to prevent terms from being stemmed, use an instance of
+    /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/>
+    /// or a custom <see cref="TokenFilter"/> that sets the <see cref="IKeywordAttribute"/>
+    /// before this <see cref="TokenStream"/>.
+    /// </remarks>
+    public sealed class JapaneseKatakanaStemFilter : TokenFilter
+    {
+        public readonly static int DEFAULT_MINIMUM_LENGTH = 4;
+        private readonly static char HIRAGANA_KATAKANA_PROLONGED_SOUND_MARK = '\u30fc';
+
+        private readonly ICharTermAttribute termAttr;
+        private readonly IKeywordAttribute keywordAttr;
+        private readonly int minimumKatakanaLength;
+
+        private readonly static Regex katakanaPattern = new Regex(@"\p{IsKatakana}", RegexOptions.Compiled | RegexOptions.CultureInvariant);
+
+        public JapaneseKatakanaStemFilter(TokenStream input, int minimumLength)
+            : base(input)
+        {
+            this.minimumKatakanaLength = minimumLength;
+            this.termAttr = AddAttribute<ICharTermAttribute>();
+            this.keywordAttr = AddAttribute<IKeywordAttribute>();
+        }
+
+        public JapaneseKatakanaStemFilter(TokenStream input)
+            : this(input, DEFAULT_MINIMUM_LENGTH)
+        {
+        }
+
+        public override bool IncrementToken()
+        {
+            if (m_input.IncrementToken())
+            {
+                if (!keywordAttr.IsKeyword)
+                {
+                    termAttr.SetLength(Stem(termAttr.Buffer, termAttr.Length));
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        private int Stem(char[] term, int length)
+        {
+            if (length < minimumKatakanaLength)
+            {
+                return length;
+            }
+
+            if (!IsKatakana(term, length))
+            {
+                return length;
+            }
+
+            if (term[length - 1] == HIRAGANA_KATAKANA_PROLONGED_SOUND_MARK)
+            {
+                return length - 1;
+            }
+
+            return length;
+        }
+
+        private bool IsKatakana(char[] term, int length)
+        {
+            for (int i = 0; i < length; i++)
+            {
+                // NOTE: Test only identifies full-width characters -- half-widths are supported
+                if (!katakanaPattern.IsMatch(term[i].ToString()))
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapaneseKatakanaStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseKatakanaStemFilterFactory.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseKatakanaStemFilterFactory.cs
new file mode 100644
index 0000000..af2acb5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseKatakanaStemFilterFactory.cs
@@ -0,0 +1,61 @@
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="JapaneseKatakanaStemFilter"/>.
+    /// <code>
+    /// &lt;fieldType name="text_ja" class="solr.TextField"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.JapaneseTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.JapaneseKatakanaStemFilterFactory"
+    ///             minimumLength="4"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    public class JapaneseKatakanaStemFilterFactory : TokenFilterFactory
+    {
+        private static readonly string MINIMUM_LENGTH_PARAM = "minimumLength";
+        private readonly int minimumLength;
+
+        /// <summary>Creates a new <see cref="JapaneseKatakanaStemFilterFactory"/></summary>
+        public JapaneseKatakanaStemFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            minimumLength = GetInt32(args, MINIMUM_LENGTH_PARAM, JapaneseKatakanaStemFilter.DEFAULT_MINIMUM_LENGTH);
+            if (minimumLength < 2)
+            {
+                throw new ArgumentException("Illegal " + MINIMUM_LENGTH_PARAM + " " + minimumLength + " (must be 2 or greater)");
+            }
+            if (args.Count > 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new JapaneseKatakanaStemFilter(input, minimumLength);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapanesePartOfSpeechStopFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapanesePartOfSpeechStopFilter.cs b/src/Lucene.Net.Analysis.Kuromoji/JapanesePartOfSpeechStopFilter.cs
new file mode 100644
index 0000000..2b1ccc4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapanesePartOfSpeechStopFilter.cs
@@ -0,0 +1,61 @@
+using Lucene.Net.Analysis.Ja.TokenAttributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Removes tokens that match a set of part-of-speech tags.
+    /// </summary>
+    public sealed class JapanesePartOfSpeechStopFilter : FilteringTokenFilter
+    {
+        private readonly ISet<string> stopTags;
+        private readonly IPartOfSpeechAttribute posAtt;
+
+        [Obsolete("EnablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
+        public JapanesePartOfSpeechStopFilter(LuceneVersion version, bool enablePositionIncrements, TokenStream input, ISet<string> stopTags)
+                  : base(version, enablePositionIncrements, input)
+        {
+            this.stopTags = stopTags;
+            this.posAtt = AddAttribute<IPartOfSpeechAttribute>();
+        }
+
+        /// <summary>
+        /// Create a new <see cref="JapanesePartOfSpeechStopFilter"/>.
+        /// </summary>
+        /// <param name="version">The Lucene match version.</param>
+        /// <param name="input">The <see cref="TokenStream"/> to consume.</param>
+        /// <param name="stopTags">The part-of-speech tags that should be removed.</param>
+        public JapanesePartOfSpeechStopFilter(LuceneVersion version, TokenStream input, ISet<string> stopTags)
+            : base(version, input)
+        {
+            this.stopTags = stopTags;
+            this.posAtt = AddAttribute<IPartOfSpeechAttribute>();
+        }
+
+        protected override bool Accept()
+        {
+            string pos = posAtt.GetPartOfSpeech();
+            return pos == null || !stopTags.Contains(pos);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapanesePartOfSpeechStopFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapanesePartOfSpeechStopFilterFactory.cs b/src/Lucene.Net.Analysis.Kuromoji/JapanesePartOfSpeechStopFilterFactory.cs
new file mode 100644
index 0000000..04fc900
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapanesePartOfSpeechStopFilterFactory.cs
@@ -0,0 +1,85 @@
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="JapanesePartOfSpeechStopFilter"/>.
+    /// <code>
+    /// &lt;fieldType name="text_ja" class="solr.TextField"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.JapaneseTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.JapanesePartOfSpeechStopFilterFactory"
+    ///             tags="stopTags.txt" 
+    ///             enablePositionIncrements="true"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    public class JapanesePartOfSpeechStopFilterFactory : TokenFilterFactory, IResourceLoaderAware
+    {
+        private readonly string stopTagFiles;
+        private readonly bool enablePositionIncrements;
+        private ISet<string> stopTags;
+
+        /// <summary>Creates a new JapanesePartOfSpeechStopFilterFactory</summary>
+        public JapanesePartOfSpeechStopFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            stopTagFiles = Get(args, "tags");
+            enablePositionIncrements = GetBoolean(args, "enablePositionIncrements", true);
+            if (args.Count > 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public virtual void Inform(IResourceLoader loader)
+        {
+            stopTags = null;
+            CharArraySet cas = GetWordSet(loader, stopTagFiles, false);
+            if (cas != null)
+            {
+                stopTags = new HashSet<string>();
+                foreach (string element in cas) 
+                {
+                    stopTags.Add(element);
+                }
+            }
+        }
+
+        public override TokenStream Create(TokenStream stream)
+        {
+            // if stoptags is null, it means the file is empty
+            if (stopTags != null)
+            {
+#pragma warning disable 612, 618
+                TokenStream filter = new JapanesePartOfSpeechStopFilter(m_luceneMatchVersion, enablePositionIncrements, stream, stopTags);
+#pragma warning restore 612, 618
+                return filter;
+            }
+            else
+            {
+                return stream;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapaneseReadingFormFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseReadingFormFilter.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseReadingFormFilter.cs
new file mode 100644
index 0000000..b2e1542
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseReadingFormFilter.cs
@@ -0,0 +1,89 @@
+using Lucene.Net.Analysis.Ja.TokenAttributes;
+using Lucene.Net.Analysis.Ja.Util;
+using Lucene.Net.Analysis.TokenAttributes;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A <see cref="TokenFilter"/> that replaces the term
+    /// attribute with the reading of a token in either katakana or romaji form.
+    /// The default reading form is katakana.
+    /// </summary>
+    public sealed class JapaneseReadingFormFilter : TokenFilter
+    {
+        private readonly ICharTermAttribute termAttr;
+        private readonly IReadingAttribute readingAttr;
+
+        private StringBuilder buffer = new StringBuilder();
+        private bool useRomaji;
+
+        public JapaneseReadingFormFilter(TokenStream input, bool useRomaji)
+            : base(input)
+        {
+            this.useRomaji = useRomaji;
+            this.termAttr = AddAttribute<ICharTermAttribute>();
+            this.readingAttr = AddAttribute<IReadingAttribute>();
+        }
+
+        public JapaneseReadingFormFilter(TokenStream input)
+            : this(input, false)
+        {
+        }
+
+        public override bool IncrementToken()
+        {
+            if (m_input.IncrementToken())
+            {
+                string reading = readingAttr.GetReading();
+
+                if (useRomaji)
+                {
+                    if (reading == null)
+                    {
+                        // if its an OOV term, just try the term text
+                        buffer.Length = 0;
+                        ToStringUtil.GetRomanization(buffer, termAttr.ToString());
+                        termAttr.SetEmpty().Append(buffer);
+                    }
+                    else
+                    {
+                        buffer.Length = 0;
+                        ToStringUtil.GetRomanization(buffer, reading);
+                        termAttr.SetEmpty().Append(buffer);
+                    }
+                }
+                else
+                {
+                    // just replace the term text with the reading, if it exists
+                    if (reading != null)
+                    {
+                        termAttr.SetEmpty().Append(reading);
+                    }
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapaneseReadingFormFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseReadingFormFilterFactory.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseReadingFormFilterFactory.cs
new file mode 100644
index 0000000..9464c2e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseReadingFormFilterFactory.cs
@@ -0,0 +1,57 @@
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="JapaneseReadingFormFilter"/>.
+    /// <code>
+    /// &lt;fieldType name="text_ja" class="solr.TextField"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.JapaneseTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.JapaneseReadingFormFilterFactory"
+    ///             useRomaji="false"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    public class JapaneseReadingFormFilterFactory : TokenFilterFactory
+    {
+        private static readonly string ROMAJI_PARAM = "useRomaji";
+        private readonly bool useRomaji;
+
+        /// <summary>Creates a new <see cref="JapaneseReadingFormFilterFactory"/>.</summary>
+        public JapaneseReadingFormFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            useRomaji = GetBoolean(args, ROMAJI_PARAM, false);
+            if (args.Count > 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new JapaneseReadingFormFilter(input, useRomaji);
+        }
+    }
+}

[07/13] lucenenet git commit: Ported Lucene.Net.Analysis.Kuromoji + tests

Posted by ni...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Util/ToStringUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Util/ToStringUtil.cs b/src/Lucene.Net.Analysis.Kuromoji/Util/ToStringUtil.cs
new file mode 100644
index 0000000..95e2703
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Util/ToStringUtil.cs
@@ -0,0 +1,1401 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Utility class for english translations of morphological data,
+    /// used only for debugging.
+    /// </summary>
+    public static class ToStringUtil
+    {
+        // a translation map for parts of speech, only used for reflectWith
+        private static readonly IDictionary<string, string> posTranslations = new Dictionary<string, string>(StringComparer.Ordinal)
+        {
+            { "名詞", "noun"},
+            { "名詞-一般", "noun-common" },
+            { "名詞-固有名詞", "noun-proper" },
+            { "名詞-固有名詞-一般", "noun-proper-misc" },
+            { "名詞-固有名詞-人名", "noun-proper-person" },
+            { "名詞-固有名詞-人名-一般", "noun-proper-person-misc" },
+            { "名詞-固有名詞-人名-姓", "noun-proper-person-surname" },
+            { "名詞-固有名詞-人名-名", "noun-proper-person-given_name" },
+            { "名詞-固有名詞-組織", "noun-proper-organization" },
+            { "名詞-固有名詞-地域", "noun-proper-place" },
+            { "名詞-固有名詞-地域-一般", "noun-proper-place-misc" },
+            { "名詞-固有名詞-地域-国", "noun-proper-place-country" },
+            { "名詞-代名詞", "noun-pronoun" },
+            { "名詞-代名詞-一般", "noun-pronoun-misc" },
+            { "名詞-代名詞-縮約", "noun-pronoun-contraction" },
+            { "名詞-副詞可能", "noun-adverbial" },
+            { "名詞-サ変接続", "noun-verbal" },
+            { "名詞-形容動詞語幹", "noun-adjective-base" },
+            { "名詞-数", "noun-numeric" },
+            { "名詞-非自立", "noun-affix" },
+            { "名詞-非自立-一般", "noun-affix-misc" },
+            { "名詞-非自立-副詞可能", "noun-affix-adverbial" },
+            { "名詞-非自立-助動詞語幹", "noun-affix-aux" },
+            { "名詞-非自立-形容動詞語幹", "noun-affix-adjective-base" },
+            { "名詞-特殊", "noun-special" },
+            { "名詞-特殊-助動詞語幹", "noun-special-aux" },
+            { "名詞-接尾", "noun-suffix" },
+            { "名詞-接尾-一般", "noun-suffix-misc" },
+            { "名詞-接尾-人名", "noun-suffix-person" },
+            { "名詞-接尾-地域", "noun-suffix-place" },
+            { "名詞-接尾-サ変接続", "noun-suffix-verbal" },
+            { "名詞-接尾-助動詞語幹", "noun-suffix-aux" },
+            { "名詞-接尾-形容動詞語幹", "noun-suffix-adjective-base" },
+            { "名詞-接尾-副詞可能", "noun-suffix-adverbial" },
+            { "名詞-接尾-助数詞", "noun-suffix-classifier" },
+            { "名詞-接尾-特殊", "noun-suffix-special" },
+            { "名詞-接続詞的", "noun-suffix-conjunctive" },
+            { "名詞-動詞非自立的", "noun-verbal_aux" },
+            { "名詞-引用文字列", "noun-quotation" },
+            { "名詞-ナイ形容詞語幹", "noun-nai_adjective" },
+            { "接頭詞", "prefix" },
+            { "接頭詞-名詞接続", "prefix-nominal" },
+            { "接頭詞-動詞接続", "prefix-verbal" },
+            { "接頭詞-形容詞接続", "prefix-adjectival" },
+            { "接頭詞-数接続", "prefix-numerical" },
+            { "動詞", "verb" },
+            { "動詞-自立", "verb-main" },
+            { "動詞-非自立", "verb-auxiliary" },
+            { "動詞-接尾", "verb-suffix" },
+            { "形容詞", "adjective" },
+            { "形容詞-自立", "adjective-main" },
+            { "形容詞-非自立", "adjective-auxiliary" },
+            { "形容詞-接尾", "adjective-suffix" },
+            { "副詞", "adverb" },
+            { "副詞-一般", "adverb-misc" },
+            { "副詞-助詞類接続", "adverb-particle_conjunction" },
+            { "連体詞", "adnominal" },
+            { "接続詞", "conjunction" },
+            { "助詞", "particle" },
+            { "助詞-格助詞", "particle-case" },
+            { "助詞-格助詞-一般", "particle-case-misc" },
+            { "助詞-格助詞-引用", "particle-case-quote" },
+            { "助詞-格助詞-連語", "particle-case-compound" },
+            { "助詞-接続助詞", "particle-conjunctive" },
+            { "助詞-係助詞", "particle-dependency" },
+            { "助詞-副助詞", "particle-adverbial" },
+            { "助詞-間投助詞", "particle-interjective" },
+            { "助詞-並立助詞", "particle-coordinate" },
+            { "助詞-終助詞", "particle-final" },
+            { "助詞-副助詞／並立助詞／終助詞", "particle-adverbial/conjunctive/final" },
+            { "助詞-連体化", "particle-adnominalizer" },
+            { "助詞-副詞化", "particle-adnominalizer" },
+            { "助詞-特殊", "particle-special" },
+            { "助動詞", "auxiliary-verb" },
+            { "感動詞", "interjection" },
+            { "記号", "symbol" },
+            { "記号-一般", "symbol-misc" },
+            { "記号-句点", "symbol-period" },
+            { "記号-読点", "symbol-comma" },
+            { "記号-空白", "symbol-space" },
+            { "記号-括弧開", "symbol-open_bracket" },
+            { "記号-括弧閉", "symbol-close_bracket" },
+            { "記号-アルファベット", "symbol-alphabetic" },
+            { "その他", "other" },
+            { "その他-間投", "other-interjection" },
+            { "フィラー", "filler" },
+            { "非言語音", "non-verbal" },
+            { "語断片", "fragment" },
+            { "未知語", "unknown" }
+        };
+
+
+        /// <summary>
+        /// Get the english form of a POS tag
+        /// </summary>
+        public static string GetPOSTranslation(string s)
+        {
+            string result;
+            posTranslations.TryGetValue(s, out result);
+            return result;
+        }
+
+        // a translation map for inflection types, only used for reflectWith
+        private static readonly IDictionary<string, string> inflTypeTranslations = new Dictionary<string, string>(StringComparer.Ordinal)
+        {
+            { "*", "*" },
+            { "形容詞・アウオ段", "adj-group-a-o-u" },
+            { "形容詞・イ段", "adj-group-i" },
+            { "形容詞・イイ",  "adj-group-ii" },
+            { "不変化型", "non-inflectional" },
+            { "特殊・タ", "special-da" },
+            { "特殊・ダ", "special-ta" },
+            { "文語・ゴトシ", "classical-gotoshi" },
+            { "特殊・ジャ", "special-ja" },
+            { "特殊・ナイ", "special-nai" },
+            { "五段・ラ行特殊", "5-row-cons-r-special" },
+            { "特殊・ヌ", "special-nu" },
+            { "文語・キ", "classical-ki" },
+            { "特殊・タイ", "special-tai" },
+            { "文語・ベシ", "classical-beshi" },
+            { "特殊・ヤ", "special-ya" },
+            { "文語・マジ", "classical-maji" },
+            { "下二・タ行", "2-row-lower-cons-t" },
+            { "特殊・デス", "special-desu" },
+            { "特殊・マス", "special-masu" },
+            { "五段・ラ行アル", "5-row-aru" },
+            { "文語・ナリ", "classical-nari" },
+            { "文語・リ", "classical-ri" },
+            { "文語・ケリ", "classical-keri" },
+            { "文語・ル", "classical-ru" },
+            { "五段・カ行イ音便", "5-row-cons-k-i-onbin" },
+            { "五段・サ行", "5-row-cons-s" },
+            { "一段", "1-row" },
+            { "五段・ワ行促音便", "5-row-cons-w-cons-onbin" },
+            { "五段・マ行", "5-row-cons-m" },
+            { "五段・タ行", "5-row-cons-t" },
+            { "五段・ラ行", "5-row-cons-r" },
+            { "サ変・−スル", "irregular-suffix-suru" },
+            { "五段・ガ行", "5-row-cons-g" },
+            { "サ変・−ズル", "irregular-suffix-zuru" },
+            { "五段・バ行", "5-row-cons-b" },
+            { "五段・ワ行ウ音便", "5-row-cons-w-u-onbin" },
+            { "下二・ダ行", "2-row-lower-cons-d" },
+            { "五段・カ行促音便ユク", "5-row-cons-k-cons-onbin-yuku" },
+            { "上二・ダ行", "2-row-upper-cons-d" },
+            { "五段・カ行促音便", "5-row-cons-k-cons-onbin" },
+            { "一段・得ル", "1-row-eru" },
+            { "四段・タ行", "4-row-cons-t" },
+            { "五段・ナ行", "5-row-cons-n" },
+            { "下二・ハ行", "2-row-lower-cons-h" },
+            { "四段・ハ行", "4-row-cons-h" },
+            { "四段・バ行", "4-row-cons-b" },
+            { "サ変・スル", "irregular-suru" },
+            { "上二・ハ行", "2-row-upper-cons-h" },
+            { "下二・マ行", "2-row-lower-cons-m" },
+            { "四段・サ行", "4-row-cons-s" },
+            { "下二・ガ行", "2-row-lower-cons-g" },
+            { "カ変・来ル", "kuru-kanji" },
+            { "一段・クレル", "1-row-kureru" },
+            { "下二・得", "2-row-lower-u" },
+            { "カ変・クル", "kuru-kana" },
+            { "ラ変", "irregular-cons-r" },
+            { "下二・カ行", "2-row-lower-cons-k" },
+        };
+
+
+        /// <summary>
+        /// Get the english form of inflection type
+        /// </summary>
+        public static string GetInflectionTypeTranslation(string s)
+        {
+            string result;
+            inflTypeTranslations.TryGetValue(s, out result);
+            return result;
+        }
+
+        // a translation map for inflection forms, only used for reflectWith
+        private static readonly IDictionary<string, string> inflFormTranslations = new Dictionary<string, string>(StringComparer.Ordinal)
+        {
+            { "*", "*" },
+            { "基本形", "base" },
+            { "文語基本形", "classical-base" },
+            { "未然ヌ接続", "imperfective-nu-connection" },
+            { "未然ウ接続", "imperfective-u-connection" },
+            { "連用タ接続", "conjunctive-ta-connection" },
+            { "連用テ接続", "conjunctive-te-connection" },
+            { "連用ゴザイ接続", "conjunctive-gozai-connection" },
+            { "体言接続", "uninflected-connection" },
+            { "仮定形", "subjunctive" },
+            { "命令ｅ", "imperative-e" },
+            { "仮定縮約１", "conditional-contracted-1" },
+            { "仮定縮約２", "conditional-contracted-2" },
+            { "ガル接続", "garu-connection" },
+            { "未然形", "imperfective" },
+            { "連用形", "conjunctive" },
+            { "音便基本形", "onbin-base" },
+            { "連用デ接続", "conjunctive-de-connection" },
+            { "未然特殊", "imperfective-special" },
+            { "命令ｉ", "imperative-i" },
+            { "連用ニ接続", "conjunctive-ni-connection" },
+            { "命令ｙｏ", "imperative-yo" },
+            { "体言接続特殊", "adnominal-special" },
+            { "命令ｒｏ", "imperative-ro" },
+            { "体言接続特殊２", "uninflected-special-connection-2" },
+            { "未然レル接続", "imperfective-reru-connection" },
+            { "現代基本形", "modern-base" },
+            { "基本形-促音便", "base-onbin" }, // not sure about this
+        };
+
+
+        /// <summary>
+        /// Get the english form of inflected form
+        /// </summary>
+        public static string GetInflectedFormTranslation(string s)
+        {
+            string result;
+            inflFormTranslations.TryGetValue(s, out result);
+            return result;
+        }
+
+        /// <summary>
+        /// Romanize katakana with modified hepburn
+        /// </summary>
+        public static string GetRomanization(string s)
+        {
+            StringBuilder result = new StringBuilder();
+            try
+            {
+                GetRomanization(result, s);
+            }
+            catch (IOException bogus)
+            {
+                throw new Exception(bogus.ToString(), bogus);
+            }
+            return result.ToString();
+        }
+
+        /// <summary>
+        /// Romanize katakana with modified hepburn
+        /// </summary>
+        // TODO: now that this is used by readingsfilter and not just for
+        // debugging, fix this to really be a scheme that works best with IMEs
+        public static void GetRomanization(StringBuilder builder, string s)
+        {
+            int len = s.Length;
+            for (int i = 0; i < len; i++)
+            {
+                // maximum lookahead: 3
+                char ch = s[i];
+                char ch2 = (i < len - 1) ? s[i + 1] : (char)0;
+                char ch3 = (i < len - 2) ? s[i + 2] : (char)0;
+
+                //main:
+                switch (ch)
+                {
+
+                    case 'ッ':
+                        switch (ch2)
+                        {
+                            case 'カ':
+                            case 'キ':
+                            case 'ク':
+                            case 'ケ':
+                            case 'コ':
+                                builder.Append('k');
+                                goto break_main;
+                            case 'サ':
+                            case 'シ':
+                            case 'ス':
+                            case 'セ':
+                            case 'ソ':
+                                builder.Append('s');
+                                goto break_main;
+                            case 'タ':
+                            case 'チ':
+                            case 'ツ':
+                            case 'テ':
+                            case 'ト':
+                                builder.Append('t');
+                                goto break_main;
+                            case 'パ':
+                            case 'ピ':
+                            case 'プ':
+                            case 'ペ':
+                            case 'ポ':
+                                builder.Append('p');
+                                goto break_main;
+                        }
+                        break;
+                    case 'ア':
+                        builder.Append('a');
+                        break;
+                    case 'イ':
+                        if (ch2 == 'ィ')
+                        {
+                            builder.Append("yi");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("ye");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append('i');
+                        }
+                        break;
+                    case 'ウ':
+                        switch (ch2)
+                        {
+                            case 'ァ':
+                                builder.Append("wa");
+                                i++;
+                                break;
+                            case 'ィ':
+                                builder.Append("wi");
+                                i++;
+                                break;
+                            case 'ゥ':
+                                builder.Append("wu");
+                                i++;
+                                break;
+                            case 'ェ':
+                                builder.Append("we");
+                                i++;
+                                break;
+                            case 'ォ':
+                                builder.Append("wo");
+                                i++;
+                                break;
+                            case 'ュ':
+                                builder.Append("wyu");
+                                i++;
+                                break;
+                            default:
+                                builder.Append('u');
+                                break;
+                        }
+                        break;
+                    case 'エ':
+                        builder.Append('e');
+                        break;
+                    case 'オ':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append('ō');
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append('o');
+                        }
+                        break;
+                    case 'カ':
+                        builder.Append("ka");
+                        break;
+                    case 'キ':
+                        if (ch2 == 'ョ' && ch3 == 'ウ')
+                        {
+                            builder.Append("kyō");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ュ' && ch3 == 'ウ')
+                        {
+                            builder.Append("kyū");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ャ')
+                        {
+                            builder.Append("kya");
+                            i++;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("kyo");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("kyu");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("kye");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("ki");
+                        }
+                        break;
+                    case 'ク':
+                        switch (ch2)
+                        {
+                            case 'ァ':
+                                builder.Append("kwa");
+                                i++;
+                                break;
+                            case 'ィ':
+                                builder.Append("kwi");
+                                i++;
+                                break;
+                            case 'ェ':
+                                builder.Append("kwe");
+                                i++;
+                                break;
+                            case 'ォ':
+                                builder.Append("kwo");
+                                i++;
+                                break;
+                            case 'ヮ':
+                                builder.Append("kwa");
+                                i++;
+                                break;
+                            default:
+                                builder.Append("ku");
+                                break;
+                        }
+                        break;
+                    case 'ケ':
+                        builder.Append("ke");
+                        break;
+                    case 'コ':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("kō");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("ko");
+                        }
+                        break;
+                    case 'サ':
+                        builder.Append("sa");
+                        break;
+                    case 'シ':
+                        if (ch2 == 'ョ' && ch3 == 'ウ')
+                        {
+                            builder.Append("shō");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ュ' && ch3 == 'ウ')
+                        {
+                            builder.Append("shū");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ャ')
+                        {
+                            builder.Append("sha");
+                            i++;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("sho");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("shu");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("she");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("shi");
+                        }
+                        break;
+                    case 'ス':
+                        if (ch2 == 'ィ')
+                        {
+                            builder.Append("si");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("su");
+                        }
+                        break;
+                    case 'セ':
+                        builder.Append("se");
+                        break;
+                    case 'ソ':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("sō");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("so");
+                        }
+                        break;
+                    case 'タ':
+                        builder.Append("ta");
+                        break;
+                    case 'チ':
+                        if (ch2 == 'ョ' && ch3 == 'ウ')
+                        {
+                            builder.Append("chō");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ュ' && ch3 == 'ウ')
+                        {
+                            builder.Append("chū");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ャ')
+                        {
+                            builder.Append("cha");
+                            i++;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("cho");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("chu");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("che");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("chi");
+                        }
+                        break;
+                    case 'ツ':
+                        if (ch2 == 'ァ')
+                        {
+                            builder.Append("tsa");
+                            i++;
+                        }
+                        else if (ch2 == 'ィ')
+                        {
+                            builder.Append("tsi");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("tse");
+                            i++;
+                        }
+                        else if (ch2 == 'ォ')
+                        {
+                            builder.Append("tso");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("tsyu");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("tsu");
+                        }
+                        break;
+                    case 'テ':
+                        if (ch2 == 'ィ')
+                        {
+                            builder.Append("ti");
+                            i++;
+                        }
+                        else if (ch2 == 'ゥ')
+                        {
+                            builder.Append("tu");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("tyu");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("te");
+                        }
+                        break;
+                    case 'ト':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("tō");
+                            i++;
+                        }
+                        else if (ch2 == 'ゥ')
+                        {
+                            builder.Append("tu");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("to");
+                        }
+                        break;
+                    case 'ナ':
+                        builder.Append("na");
+                        break;
+                    case 'ニ':
+                        if (ch2 == 'ョ' && ch3 == 'ウ')
+                        {
+                            builder.Append("nyō");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ュ' && ch3 == 'ウ')
+                        {
+                            builder.Append("nyū");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ャ')
+                        {
+                            builder.Append("nya");
+                            i++;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("nyo");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("nyu");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("nye");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("ni");
+                        }
+                        break;
+                    case 'ヌ':
+                        builder.Append("nu");
+                        break;
+                    case 'ネ':
+                        builder.Append("ne");
+                        break;
+                    case 'ノ':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("nō");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("no");
+                        }
+                        break;
+                    case 'ハ':
+                        builder.Append("ha");
+                        break;
+                    case 'ヒ':
+                        if (ch2 == 'ョ' && ch3 == 'ウ')
+                        {
+                            builder.Append("hyō");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ュ' && ch3 == 'ウ')
+                        {
+                            builder.Append("hyū");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ャ')
+                        {
+                            builder.Append("hya");
+                            i++;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("hyo");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("hyu");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("hye");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("hi");
+                        }
+                        break;
+                    case 'フ':
+                        if (ch2 == 'ャ')
+                        {
+                            builder.Append("fya");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("fyu");
+                            i++;
+                        }
+                        else if (ch2 == 'ィ' && ch3 == 'ェ')
+                        {
+                            builder.Append("fye");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("fyo");
+                            i++;
+                        }
+                        else if (ch2 == 'ァ')
+                        {
+                            builder.Append("fa");
+                            i++;
+                        }
+                        else if (ch2 == 'ィ')
+                        {
+                            builder.Append("fi");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("fe");
+                            i++;
+                        }
+                        else if (ch2 == 'ォ')
+                        {
+                            builder.Append("fo");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("fu");
+                        }
+                        break;
+                    case 'ヘ':
+                        builder.Append("he");
+                        break;
+                    case 'ホ':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("hō");
+                            i++;
+                        }
+                        else if (ch2 == 'ゥ')
+                        {
+                            builder.Append("hu");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("ho");
+                        }
+                        break;
+                    case 'マ':
+                        builder.Append("ma");
+                        break;
+                    case 'ミ':
+                        if (ch2 == 'ョ' && ch3 == 'ウ')
+                        {
+                            builder.Append("myō");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ュ' && ch3 == 'ウ')
+                        {
+                            builder.Append("myū");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ャ')
+                        {
+                            builder.Append("mya");
+                            i++;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("myo");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("myu");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("mye");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("mi");
+                        }
+                        break;
+                    case 'ム':
+                        builder.Append("mu");
+                        break;
+                    case 'メ':
+                        builder.Append("me");
+                        break;
+                    case 'モ':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("mō");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("mo");
+                        }
+                        break;
+                    case 'ヤ':
+                        builder.Append("ya");
+                        break;
+                    case 'ユ':
+                        builder.Append("yu");
+                        break;
+                    case 'ヨ':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("yō");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("yo");
+                        }
+                        break;
+                    case 'ラ':
+                        if (ch2 == '゜')
+                        {
+                            builder.Append("la");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("ra");
+                        }
+                        break;
+                    case 'リ':
+                        if (ch2 == 'ョ' && ch3 == 'ウ')
+                        {
+                            builder.Append("ryō");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ュ' && ch3 == 'ウ')
+                        {
+                            builder.Append("ryū");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ャ')
+                        {
+                            builder.Append("rya");
+                            i++;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("ryo");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("ryu");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("rye");
+                            i++;
+                        }
+                        else if (ch2 == '゜')
+                        {
+                            builder.Append("li");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("ri");
+                        }
+                        break;
+                    case 'ル':
+                        if (ch2 == '゜')
+                        {
+                            builder.Append("lu");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("ru");
+                        }
+                        break;
+                    case 'レ':
+                        if (ch2 == '゜')
+                        {
+                            builder.Append("le");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("re");
+                        }
+                        break;
+                    case 'ロ':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("rō");
+                            i++;
+                        }
+                        else if (ch2 == '゜')
+                        {
+                            builder.Append("lo");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("ro");
+                        }
+                        break;
+                    case 'ワ':
+                        builder.Append("wa");
+                        break;
+                    case 'ヰ':
+                        builder.Append("i");
+                        break;
+                    case 'ヱ':
+                        builder.Append("e");
+                        break;
+                    case 'ヲ':
+                        builder.Append("o");
+                        break;
+                    case 'ン':
+                        switch (ch2)
+                        {
+                            case 'バ':
+                            case 'ビ':
+                            case 'ブ':
+                            case 'ベ':
+                            case 'ボ':
+                            case 'パ':
+                            case 'ピ':
+                            case 'プ':
+                            case 'ペ':
+                            case 'ポ':
+                            case 'マ':
+                            case 'ミ':
+                            case 'ム':
+                            case 'メ':
+                            case 'モ':
+                                builder.Append('m');
+                                goto break_main;
+                            case 'ヤ':
+                            case 'ユ':
+                            case 'ヨ':
+                            case 'ア':
+                            case 'イ':
+                            case 'ウ':
+                            case 'エ':
+                            case 'オ':
+                                builder.Append("n'");
+                                goto break_main;
+                            default:
+                                builder.Append("n");
+                                goto break_main;
+                        }
+                    case 'ガ':
+                        builder.Append("ga");
+                        break;
+                    case 'ギ':
+                        if (ch2 == 'ョ' && ch3 == 'ウ')
+                        {
+                            builder.Append("gyō");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ュ' && ch3 == 'ウ')
+                        {
+                            builder.Append("gyū");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ャ')
+                        {
+                            builder.Append("gya");
+                            i++;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("gyo");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("gyu");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("gye");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("gi");
+                        }
+                        break;
+                    case 'グ':
+                        switch (ch2)
+                        {
+                            case 'ァ':
+                                builder.Append("gwa");
+                                i++;
+                                break;
+                            case 'ィ':
+                                builder.Append("gwi");
+                                i++;
+                                break;
+                            case 'ェ':
+                                builder.Append("gwe");
+                                i++;
+                                break;
+                            case 'ォ':
+                                builder.Append("gwo");
+                                i++;
+                                break;
+                            case 'ヮ':
+                                builder.Append("gwa");
+                                i++;
+                                break;
+                            default:
+                                builder.Append("gu");
+                                break;
+                        }
+                        break;
+                    case 'ゲ':
+                        builder.Append("ge");
+                        break;
+                    case 'ゴ':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("gō");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("go");
+                        }
+                        break;
+                    case 'ザ':
+                        builder.Append("za");
+                        break;
+                    case 'ジ':
+                        if (ch2 == 'ョ' && ch3 == 'ウ')
+                        {
+                            builder.Append("jō");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ュ' && ch3 == 'ウ')
+                        {
+                            builder.Append("jū");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ャ')
+                        {
+                            builder.Append("ja");
+                            i++;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("jo");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("ju");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("je");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("ji");
+                        }
+                        break;
+                    case 'ズ':
+                        if (ch2 == 'ィ')
+                        {
+                            builder.Append("zi");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("zu");
+                        }
+                        break;
+                    case 'ゼ':
+                        builder.Append("ze");
+                        break;
+                    case 'ゾ':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("zō");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("zo");
+                        }
+                        break;
+                    case 'ダ':
+                        builder.Append("da");
+                        break;
+                    case 'ヂ':
+                        // TODO: investigate all this
+                        if (ch2 == 'ョ' && ch3 == 'ウ')
+                        {
+                            builder.Append("jō");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ュ' && ch3 == 'ウ')
+                        {
+                            builder.Append("jū");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ャ')
+                        {
+                            builder.Append("ja");
+                            i++;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("jo");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("ju");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("je");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("ji");
+                        }
+                        break;
+                    case 'ヅ':
+                        builder.Append("zu");
+                        break;
+                    case 'デ':
+                        if (ch2 == 'ィ')
+                        {
+                            builder.Append("di");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("dyu");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("de");
+                        }
+                        break;
+                    case 'ド':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("dō");
+                            i++;
+                        }
+                        else if (ch2 == 'ゥ')
+                        {
+                            builder.Append("du");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("do");
+                        }
+                        break;
+                    case 'バ':
+                        builder.Append("ba");
+                        break;
+                    case 'ビ':
+                        if (ch2 == 'ョ' && ch3 == 'ウ')
+                        {
+                            builder.Append("byō");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ュ' && ch3 == 'ウ')
+                        {
+                            builder.Append("byū");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ャ')
+                        {
+                            builder.Append("bya");
+                            i++;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("byo");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("byu");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("bye");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("bi");
+                        }
+                        break;
+                    case 'ブ':
+                        builder.Append("bu");
+                        break;
+                    case 'ベ':
+                        builder.Append("be");
+                        break;
+                    case 'ボ':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("bō");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("bo");
+                        }
+                        break;
+                    case 'パ':
+                        builder.Append("pa");
+                        break;
+                    case 'ピ':
+                        if (ch2 == 'ョ' && ch3 == 'ウ')
+                        {
+                            builder.Append("pyō");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ュ' && ch3 == 'ウ')
+                        {
+                            builder.Append("pyū");
+                            i += 2;
+                        }
+                        else if (ch2 == 'ャ')
+                        {
+                            builder.Append("pya");
+                            i++;
+                        }
+                        else if (ch2 == 'ョ')
+                        {
+                            builder.Append("pyo");
+                            i++;
+                        }
+                        else if (ch2 == 'ュ')
+                        {
+                            builder.Append("pyu");
+                            i++;
+                        }
+                        else if (ch2 == 'ェ')
+                        {
+                            builder.Append("pye");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("pi");
+                        }
+                        break;
+                    case 'プ':
+                        builder.Append("pu");
+                        break;
+                    case 'ペ':
+                        builder.Append("pe");
+                        break;
+                    case 'ポ':
+                        if (ch2 == 'ウ')
+                        {
+                            builder.Append("pō");
+                            i++;
+                        }
+                        else
+                        {
+                            builder.Append("po");
+                        }
+                        break;
+                    case 'ヷ':
+                        builder.Append("va");
+                        break;
+                    case 'ヸ':
+                        builder.Append("vi");
+                        break;
+                    case 'ヹ':
+                        builder.Append("ve");
+                        break;
+                    case 'ヺ':
+                        builder.Append("vo");
+                        break;
+                    case 'ヴ':
+                        if (ch2 == 'ィ' && ch3 == 'ェ')
+                        {
+                            builder.Append("vye");
+                            i += 2;
+                        }
+                        else
+                        {
+                            builder.Append('v');
+                        }
+                        break;
+                    case 'ァ':
+                        builder.Append('a');
+                        break;
+                    case 'ィ':
+                        builder.Append('i');
+                        break;
+                    case 'ゥ':
+                        builder.Append('u');
+                        break;
+                    case 'ェ':
+                        builder.Append('e');
+                        break;
+                    case 'ォ':
+                        builder.Append('o');
+                        break;
+                    case 'ヮ':
+                        builder.Append("wa");
+                        break;
+                    case 'ャ':
+                        builder.Append("ya");
+                        break;
+                    case 'ュ':
+                        builder.Append("yu");
+                        break;
+                    case 'ョ':
+                        builder.Append("yo");
+                        break;
+                    case 'ー':
+                        break;
+                    default:
+                        builder.Append(ch);
+                        break;
+                }
+                break_main: { }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/project.json b/src/Lucene.Net.Analysis.Kuromoji/project.json
new file mode 100644
index 0000000..937b9bf
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/project.json
@@ -0,0 +1,60 @@
+{
+  "version": "4.8.0",
+  "title": "Lucene.Net.Analysis.Kuromoji",
+  "description": "Japanese Morphological Analyzer for the Lucene.Net full-text search engine library from The Apache Software Foundation.",
+  "authors": [ "The Apache Software Foundation" ],
+  "packOptions": {
+    "projectUrl": "http://lucenenet.apache.org/",
+    "licenseUrl": "https://github.com/apache/lucenenet/blob/master/LICENSE.txt",
+    "iconUrl": "https://github.com/apache/lucenenet/blob/master/branding/logo/lucene-net-icon-128x128.png?raw=true",
+    "owners": [ "The Apache Software Foundation" ],
+    "repository": { "url": "https://github.com/apache/lucenenet" },
+    "tags": [ "lucene.net", "core", "text", "search", "information", "retrieval", "lucene", "apache", "analysis", "index", "query", "japanese" ],
+    "releaseNotes": "This package depends on a temporary version of icu.net hosted on MyGet until official .NET Core support is added. To install, copy the NuGet.config file from https://github.com/apache/lucenenet/blob/master/NuGet.config into your project and then install this package via Package Manager Console as usual."
+  },
+  "buildOptions": {
+    "compile": {
+      "includeFiles": [ "../CommonAssemblyInfo.cs" ]
+    },
+    "embed": {
+      "includeFiles": [
+        "stoptags.txt",
+        "stopwords.txt",
+        "Dict/CharacterDefinition.dat",
+        "Dict/ConnectionCosts.dat",
+        "Dict/TokenInfoDictionary$buffer.dat",
+        "Dict/TokenInfoDictionary$fst.dat",
+        "Dict/TokenInfoDictionary$posDict.dat",
+        "Dict/TokenInfoDictionary$targetMap.dat",
+        "Dict/UnknownDictionary$buffer.dat",
+        "Dict/UnknownDictionary$posDict.dat",
+        "Dict/UnknownDictionary$targetMap.dat"
+      ]
+    },
+    "nowarn": [ "1591", "1573" ]
+  },
+  "dependencies": {
+    "Lucene.Net": "4.8.0",
+    "Lucene.Net.Analysis.Common": "4.8.0"
+  },
+  "frameworks": {
+    "netstandard1.5": {
+      "imports": "dnxcore50",
+      "buildOptions": {
+        "debugType": "portable",
+        "define": [ "NETSTANDARD" ]
+      },
+      "dependencies": {
+        "NETStandard.Library": "1.6.0",
+        "System.Globalization.Extensions": "4.3.0",
+        "System.Text.Encoding.CodePages": "4.4.0-preview1-25305-02"
+      }
+    },
+    "net451": {
+      "buildOptions": {
+        "debugType": "full",
+        "define": [ "FEATURE_SERIALIZABLE" ]
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/stoptags.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/stoptags.txt b/src/Lucene.Net.Analysis.Kuromoji/stoptags.txt
new file mode 100644
index 0000000..71b7508
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/stoptags.txt
@@ -0,0 +1,420 @@
+#
+# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
+#
+# Any token with a part-of-speech tag that exactly matches those defined in this
+# file are removed from the token stream.
+#
+# Set your own stoptags by uncommenting the lines below.  Note that comments are
+# not allowed on the same line as a stoptag.  See LUCENE-3745 for frequency lists,
+# etc. that can be useful for building you own stoptag set.
+#
+# The entire possible tagset is provided below for convenience.
+#
+#####
+#  noun: unclassified nouns
+#名詞
+#
+#  noun-common: Common nouns or nouns where the sub-classification is undefined
+#名詞-一般
+#
+#  noun-proper: Proper nouns where the sub-classification is undefined 
+#名詞-固有名詞
+#
+#  noun-proper-misc: miscellaneous proper nouns
+#名詞-固有名詞-一般
+#
+#  noun-proper-person: Personal names where the sub-classification is undefined
+#名詞-固有名詞-人名
+#
+#  noun-proper-person-misc: names that cannot be divided into surname and 
+#  given name; foreign names; names where the surname or given name is unknown.
+#  e.g. お市の方
+#名詞-固有名詞-人名-一般
+#
+#  noun-proper-person-surname: Mainly Japanese surnames.
+#  e.g. 山田
+#名詞-固有名詞-人名-姓
+#
+#  noun-proper-person-given_name: Mainly Japanese given names.
+#  e.g. 太郎
+#名詞-固有名詞-人名-名
+#
+#  noun-proper-organization: Names representing organizations.
+#  e.g. 通産省, NHK
+#名詞-固有名詞-組織
+#
+#  noun-proper-place: Place names where the sub-classification is undefined
+#名詞-固有名詞-地域
+#
+#  noun-proper-place-misc: Place names excluding countries.
+#  e.g. アジア, バルセロナ, 京都
+#名詞-固有名詞-地域-一般
+#
+#  noun-proper-place-country: Country names. 
+#  e.g. 日本, オーストラリア
+#名詞-固有名詞-地域-国
+#
+#  noun-pronoun: Pronouns where the sub-classification is undefined
+#名詞-代名詞
+#
+#  noun-pronoun-misc: miscellaneous pronouns: 
+#  e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
+#名詞-代名詞-一般
+#
+#  noun-pronoun-contraction: Spoken language contraction made by combining a 
+#  pronoun and the particle 'wa'.
+#  e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ 
+#名詞-代名詞-縮約
+#
+#  noun-adverbial: Temporal nouns such as names of days or months that behave 
+#  like adverbs. Nouns that represent amount or ratios and can be used adverbially,
+#  e.g. 金曜, 一月, 午後, 少量
+#名詞-副詞可能
+#
+#  noun-verbal: Nouns that take arguments with case and can appear followed by 
+#  'suru' and related verbs (する, できる, なさる, くださる)
+#  e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
+#名詞-サ変接続
+#
+#  noun-adjective-base: The base form of adjectives, words that appear before な ("na")
+#  e.g. 健康, 安易, 駄目, だめ
+#名詞-形容動詞語幹
+#
+#  noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
+#  e.g. 0, 1, 2, 何, 数, 幾
+#名詞-数
+#
+#  noun-affix: noun affixes where the sub-classification is undefined
+#名詞-非自立
+#
+#  noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that 
+#  attach to the base form of inflectional words, words that cannot be classified 
+#  into any of the other categories below. This category includes indefinite nouns.
+#  e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, 
+#       順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, 
+#       拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
+#       わり, 割り, 割, ん-口語/, もん-口語/
+#名詞-非自立-一般
+#
+#  noun-affix-adverbial: noun affixes that that can behave as adverbs.
+#  e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, 
+#       上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, 
+#       最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, 
+#       とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, 
+#       儘, 侭, みぎり, 矢先
+#名詞-非自立-副詞可能
+#
+#  noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars 
+#  with the stem よう(だ) ("you(da)").
+#  e.g.  よう, やう, 様 (よう)
+#名詞-非自立-助動詞語幹
+#  
+#  noun-affix-adjective-base: noun affixes that can connect to the indeclinable
+#  connection form な (aux "da").
+#  e.g. みたい, ふう
+#名詞-非自立-形容動詞語幹
+#
+#  noun-special: special nouns where the sub-classification is undefined.
+#名詞-特殊
+#
+#  noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is 
+#  treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base 
+#  form of inflectional words.
+#  e.g. そう
+#名詞-特殊-助動詞語幹
+#
+#  noun-suffix: noun suffixes where the sub-classification is undefined.
+#名詞-接尾
+#
+#  noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect 
+#  to ガル or タイ and can combine into compound nouns, words that cannot be classified into
+#  any of the other categories below. In general, this category is more inclusive than 
+#  接尾語 ("suffix") and is usually the last element in a compound noun.
+#  e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (～した) さ, 次第, 済 (ず) み,
+#       よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
+#名詞-接尾-一般
+#
+#  noun-suffix-person: Suffixes that form nouns and attach to person names more often
+#  than other nouns.
+#  e.g. 君, 様, 著
+#名詞-接尾-人名
+#
+#  noun-suffix-place: Suffixes that form nouns and attach to place names more often 
+#  than other nouns.
+#  e.g. 町, 市, 県
+#名詞-接尾-地域
+#
+#  noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that 
+#  can appear before スル ("suru").
+#  e.g. 化, 視, 分け, 入り, 落ち, 買い
+#名詞-接尾-サ変接続
+#
+#  noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, 
+#  is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the 
+#  conjunctive form of inflectional words.
+#  e.g. そう
+#名詞-接尾-助動詞語幹
+#
+#  noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive 
+#  form of inflectional words and appear before the copula だ ("da").
+#  e.g. 的, げ, がち
+#名詞-接尾-形容動詞語幹
+#
+#  noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
+#  e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
+#名詞-接尾-副詞可能
+#
+#  noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category 
+#  is more inclusive than 助数詞 ("classifier") and includes common nouns that attach 
+#  to numbers.
+#  e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
+#名詞-接尾-助数詞
+#
+#  noun-suffix-special: Special suffixes that mainly attach to inflecting words.
+#  e.g. (楽し) さ, (考え) 方
+#名詞-接尾-特殊
+#
+#  noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words 
+#  together.
+#  e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
+#名詞-接続詞的
+#
+#  noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are 
+#  semantically verb-like.
+#  e.g. ごらん, ご覧, 御覧, 頂戴
+#名詞-動詞非自立的
+#
+#  noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, 
+#  dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") 
+#  is いわく ("iwaku").
+#名詞-引用文字列
+#
+#  noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
+#  behave like an adjective.
+#  e.g. 申し訳, 仕方, とんでも, 違い
+#名詞-ナイ形容詞語幹
+#
+#####
+#  prefix: unclassified prefixes
+#接頭詞
+#
+#  prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) 
+#  excluding numerical expressions.
+#  e.g. お (水), 某 (氏), 同 (社), 故 (～氏), 高 (品質), お (見事), ご (立派)
+#接頭詞-名詞接続
+#
+#  prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
+#  in conjunctive form followed by なる/なさる/くださる.
+#  e.g. お (読みなさい), お (座り)
+#接頭詞-動詞接続
+#
+#  prefix-adjectival: Prefixes that attach to adjectives.
+#  e.g. お (寒いですねえ), バカ (でかい)
+#接頭詞-形容詞接続
+#
+#  prefix-numerical: Prefixes that attach to numerical expressions.
+#  e.g. 約, およそ, 毎時
+#接頭詞-数接続
+#
+#####
+#  verb: unclassified verbs
+#動詞
+#
+#  verb-main:
+#動詞-自立
+#
+#  verb-auxiliary:
+#動詞-非自立
+#
+#  verb-suffix:
+#動詞-接尾
+#
+#####
+#  adjective: unclassified adjectives
+#形容詞
+#
+#  adjective-main:
+#形容詞-自立
+#
+#  adjective-auxiliary:
+#形容詞-非自立
+#
+#  adjective-suffix:
+#形容詞-接尾
+#
+#####
+#  adverb: unclassified adverbs
+#副詞
+#
+#  adverb-misc: Words that can be segmented into one unit and where adnominal 
+#  modification is not possible.
+#  e.g. あいかわらず, 多分
+#副詞-一般
+#
+#  adverb-particle_conjunction: Adverbs that can be followed by の, は, に, 
+#  な, する, だ, etc.
+#  e.g. こんなに, そんなに, あんなに, なにか, なんでも
+#副詞-助詞類接続
+#
+#####
+#  adnominal: Words that only have noun-modifying forms.
+#  e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, 
+#       どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, 
+#       「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
+#連体詞
+#
+#####
+#  conjunction: Conjunctions that can occur independently.
+#  e.g. が, けれども, そして, じゃあ, それどころか
+接続詞
+#
+#####
+#  particle: unclassified particles.
+助詞
+#
+#  particle-case: case particles where the subclassification is undefined.
+助詞-格助詞
+#
+#  particle-case-misc: Case particles.
+#  e.g. から, が, で, と, に, へ, より, を, の, にて
+助詞-格助詞-一般
+#
+#  particle-case-quote: the "to" that appears after nouns, a person’s speech, 
+#  quotation marks, expressions of decisions from a meeting, reasons, judgements,
+#  conjectures, etc.
+#  e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
+助詞-格助詞-引用
+#
+#  particle-case-compound: Compounds of particles and verbs that mainly behave 
+#  like case particles.
+#  e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
+#       にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, 
+#       にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, 
+#       に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, 
+#       に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
+#       にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, 
+#       にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
+#       って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
+助詞-格助詞-連語
+#
+#  particle-conjunctive:
+#  e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, 
+#       ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, 
+#       (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
+助詞-接続助詞
+#
+#  particle-dependency:
+#  e.g. こそ, さえ, しか, すら, は, も, ぞ
+助詞-係助詞
+#
+#  particle-adverbial:
+#  e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, 
+#       (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
+#       (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, 
+#       (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
+#       ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
+助詞-副助詞
+#
+#  particle-interjective: particles with interjective grammatical roles.
+#  e.g. (松島) や
+助詞-間投助詞
+#
+#  particle-coordinate:
+#  e.g. と, たり, だの, だり, とか, なり, や, やら
+助詞-並立助詞
+#
+#  particle-final:
+#  e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, 
+#       ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
+助詞-終助詞
+#
+#  particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is 
+#  adverbial, conjunctive, or sentence final. For example:
+#       (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
+#       (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
+#           「(祈りが届いたせい) か (, 試験に合格した.)」
+#       (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
+#  e.g. か
+助詞-副助詞／並立助詞／終助詞
+#
+#  particle-adnominalizer: The "no" that attaches to nouns and modifies 
+#  non-inflectional words.
+助詞-連体化
+#
+#  particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs 
+#  that are giongo, giseigo, or gitaigo.
+#  e.g. に, と
+助詞-副詞化
+#
+#  particle-special: A particle that does not fit into one of the above classifications. 
+#  This includes particles that are used in Tanka, Haiku, and other poetry.
+#  e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
+助詞-特殊
+#
+#####
+#  auxiliary-verb:
+助動詞
+#
+#####
+#  interjection: Greetings and other exclamations.
+#  e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, 
+#       いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
+#感動詞
+#
+#####
+#  symbol: unclassified Symbols.
+記号
+#
+#  symbol-misc: A general symbol not in one of the categories below.
+#  e.g. [○◎@$〒→+]
+記号-一般
+#
+#  symbol-comma: Commas
+#  e.g. [,、]
+記号-読点
+#
+#  symbol-period: Periods and full stops.
+#  e.g. [.．。]
+記号-句点
+#
+#  symbol-space: Full-width whitespace.
+記号-空白
+#
+#  symbol-open_bracket:
+#  e.g. [({‘“『【]
+記号-括弧開
+#
+#  symbol-close_bracket:
+#  e.g. [)}’”』」】]
+記号-括弧閉
+#
+#  symbol-alphabetic:
+#記号-アルファベット
+#
+#####
+#  other: unclassified other
+#その他
+#
+#  other-interjection: Words that are hard to classify as noun-suffixes or 
+#  sentence-final particles.
+#  e.g. (だ)ァ
+その他-間投
+#
+#####
+#  filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
+#  e.g. あの, うんと, えと
+フィラー
+#
+#####
+#  non-verbal: non-verbal sound.
+非言語音
+#
+#####
+#  fragment:
+#語断片
+#
+#####
+#  unknown: unknown part of speech.
+#未知語
+#
+##### End of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/stopwords.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/stopwords.txt b/src/Lucene.Net.Analysis.Kuromoji/stopwords.txt
new file mode 100644
index 0000000..d4321be
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/stopwords.txt
@@ -0,0 +1,127 @@
+#
+# This file defines a stopword set for Japanese.
+#
+# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
+# Punctuation characters and frequent kanji have mostly been left out.  See LUCENE-3745
+# for frequency lists, etc. that can be useful for making your own set (if desired)
+#
+# Note that there is an overlap between these stopwords and the terms stopped when used
+# in combination with the JapanesePartOfSpeechStopFilter.  When editing this file, note
+# that comments are not allowed on the same line as stopwords.
+#
+# Also note that stopping is done in a case-insensitive manner.  Change your StopFilter
+# configuration if you need case-sensitive stopping.  Lastly, note that stopping is done
+# using the same character width as the entries in this file.  Since this StopFilter is
+# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
+# entries to be in half-width and your kana entries to be in full-width.
+#
+の
+に
+は
+を
+た
+が
+で
+て
+と
+し
+れ
+さ
+ある
+いる
+も
+する
+から
+な
+こと
+として
+い
+や
+れる
+など
+なっ
+ない
+この
+ため
+その
+あっ
+よう
+また
+もの
+という
+あり
+まで
+られ
+なる
+へ
+か
+だ
+これ
+によって
+により
+おり
+より
+による
+ず
+なり
+られる
+において
+ば
+なかっ
+なく
+しかし
+について
+せ
+だっ
+その後
+できる
+それ
+う
+ので
+なお
+のみ
+でき
+き
+つ
+における
+および
+いう
+さらに
+でも
+ら
+たり
+その他
+に関する
+たち
+ます
+ん
+なら
+に対して
+特に
+せる
+及び
+これら
+とき
+では
+にて
+ほか
+ながら
+うち
+そして
+とともに
+ただし
+かつて
+それぞれ
+または
+お
+ほど
+ものの
+に対する
+ほとんど
+と共に
+といった
+です
+とも
+ところ
+ここ
+##### End of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Dict/TestTokenInfoDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Dict/TestTokenInfoDictionary.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/Dict/TestTokenInfoDictionary.cs
new file mode 100644
index 0000000..dd305a4
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Dict/TestTokenInfoDictionary.cs
@@ -0,0 +1,114 @@
+using Lucene.Net.Analysis.Ja.Util;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Fst;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Ja.Dict
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TestTokenInfoDictionary : LuceneTestCase
+    {
+        /// <summary>enumerates the entire FST/lookup data and just does basic sanity checks</summary>
+        [Test]
+        public void TestEnumerateAll()
+        {
+            // just for debugging
+            int numTerms = 0;
+            int numWords = 0;
+            int lastWordId = -1;
+            int lastSourceId = -1;
+            TokenInfoDictionary tid = TokenInfoDictionary.GetInstance();
+            ConnectionCosts matrix = ConnectionCosts.GetInstance();
+            FST<long?> fst = tid.FST.InternalFST;
+            Int32sRefFSTEnum<long?> fstEnum = new Int32sRefFSTEnum<long?>(fst);
+            Int32sRefFSTEnum.InputOutput<long?> mapping;
+            Int32sRef scratch = new Int32sRef();
+            while ((mapping = fstEnum.Next()) != null)
+            {
+                numTerms++;
+                Int32sRef input = mapping.Input;
+                char[] chars = new char[input.Length];
+                for (int i = 0; i < chars.Length; i++)
+                {
+                    chars[i] = (char)input.Int32s[input.Offset + i];
+                }
+                assertTrue(UnicodeUtil.ValidUTF16String(new string(chars)));
+
+                long? output = mapping.Output;
+                int sourceId = (int)output.Value;
+                // we walk in order, terms, sourceIds, and wordIds should always be increasing
+                assertTrue(sourceId > lastSourceId);
+                lastSourceId = sourceId;
+                tid.LookupWordIds(sourceId, scratch);
+                for (int i = 0; i < scratch.Length; i++)
+                {
+                    numWords++;
+                    int wordId = scratch.Int32s[scratch.Offset + i];
+                    assertTrue(wordId > lastWordId);
+                    lastWordId = wordId;
+
+                    String baseForm = tid.GetBaseForm(wordId, chars, 0, chars.Length);
+                    assertTrue(baseForm == null || UnicodeUtil.ValidUTF16String(baseForm));
+
+                    String inflectionForm = tid.GetInflectionForm(wordId);
+                    assertTrue(inflectionForm == null || UnicodeUtil.ValidUTF16String(inflectionForm));
+                    if (inflectionForm != null)
+                    {
+                        // check that its actually an ipadic inflection form
+                        assertNotNull(ToStringUtil.GetInflectedFormTranslation(inflectionForm));
+                    }
+
+                    String inflectionType = tid.GetInflectionType(wordId);
+                    assertTrue(inflectionType == null || UnicodeUtil.ValidUTF16String(inflectionType));
+                    if (inflectionType != null)
+                    {
+                        // check that its actually an ipadic inflection type
+                        assertNotNull(ToStringUtil.GetInflectionTypeTranslation(inflectionType));
+                    }
+
+                    int leftId = tid.GetLeftId(wordId);
+                    int rightId = tid.GetRightId(wordId);
+
+                    matrix.Get(rightId, leftId);
+
+                    tid.GetWordCost(wordId);
+
+                    String pos = tid.GetPartOfSpeech(wordId);
+                    assertNotNull(pos);
+                    assertTrue(UnicodeUtil.ValidUTF16String(pos));
+                    // check that its actually an ipadic pos tag
+                    assertNotNull(ToStringUtil.GetPOSTranslation(pos));
+
+                    String pronunciation = tid.GetPronunciation(wordId, chars, 0, chars.Length);
+                    assertNotNull(pronunciation);
+                    assertTrue(UnicodeUtil.ValidUTF16String(pronunciation));
+
+                    String reading = tid.GetReading(wordId, chars, 0, chars.Length);
+                    assertNotNull(reading);
+                    assertTrue(UnicodeUtil.ValidUTF16String(reading));
+                }
+            }
+            if (VERBOSE)
+            {
+                Console.WriteLine("checked " + numTerms + " terms, " + numWords + " words.");
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Dict/UserDictionaryTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Dict/UserDictionaryTest.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/Dict/UserDictionaryTest.cs
new file mode 100644
index 0000000..f899476
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Dict/UserDictionaryTest.cs
@@ -0,0 +1,90 @@
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Ja.Dict
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class UserDictionaryTest : LuceneTestCase
+    {
+        [Test]
+        public void TestLookup()
+        {
+            UserDictionary dictionary = TestJapaneseTokenizer.ReadDict();
+            String s = "関西国際空港に行った";
+            int[][] dictionaryEntryResult = dictionary.Lookup(s.toCharArray(), 0, s.Length);
+            // Length should be three 関西, 国際, 空港
+            assertEquals(3, dictionaryEntryResult.Length);
+
+            // Test positions
+            assertEquals(0, dictionaryEntryResult[0][1]); // index of 関西
+            assertEquals(2, dictionaryEntryResult[1][1]); // index of 国際
+            assertEquals(4, dictionaryEntryResult[2][1]); // index of 空港
+
+            // Test lengths
+            assertEquals(2, dictionaryEntryResult[0][2]); // length of 関西
+            assertEquals(2, dictionaryEntryResult[1][2]); // length of 国際
+            assertEquals(2, dictionaryEntryResult[2][2]); // length of 空港
+
+            s = "関西国際空港と関西国際空港に行った";
+            int[][] dictionaryEntryResult2 = dictionary.Lookup(s.toCharArray(), 0, s.Length);
+            // Length should be six 
+            assertEquals(6, dictionaryEntryResult2.Length);
+        }
+
+        [Test]
+        public void TestReadings()
+        {
+            UserDictionary dictionary = TestJapaneseTokenizer.ReadDict();
+            int[]
+                []
+                result = dictionary.Lookup("日本経済新聞".toCharArray(), 0, 6);
+            assertEquals(3, result.Length);
+            int wordIdNihon = result[0]
+                [0]; // wordId of 日本 in 日本経済新聞
+            assertEquals("ニホン", dictionary.GetReading(wordIdNihon, "日本".toCharArray(), 0, 2));
+
+            result = dictionary.Lookup("朝青龍".toCharArray(), 0, 3);
+            assertEquals(1, result.Length);
+            int wordIdAsashoryu = result[0]
+                [0]; // wordId for 朝青龍
+            assertEquals("アサショウリュウ", dictionary.GetReading(wordIdAsashoryu, "朝青龍".toCharArray(), 0, 3));
+        }
+
+        [Test]
+        public void TestPartOfSpeech()
+        {
+            UserDictionary dictionary = TestJapaneseTokenizer.ReadDict();
+            int[]
+                []
+                result = dictionary.Lookup("日本経済新聞".toCharArray(), 0, 6);
+            assertEquals(3, result.Length);
+            int wordIdKeizai = result[1]
+                [0]; // wordId of 経済 in 日本経済新聞
+            assertEquals("カスタム名詞", dictionary.GetPartOfSpeech(wordIdKeizai));
+        }
+
+        [Test]
+        public void TestRead()
+        {
+            UserDictionary dictionary = TestJapaneseTokenizer.ReadDict();
+            assertNotNull(dictionary);
+        }
+    }
+}

[04/13] lucenenet git commit: Ported Lucene.Net.Analysis.Kuromoji + tests

Posted by ni...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseBaseFormFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseBaseFormFilter.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseBaseFormFilter.cs
new file mode 100644
index 0000000..609803f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseBaseFormFilter.cs
@@ -0,0 +1,84 @@
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Util;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TestJapaneseBaseFormFilter : BaseTokenStreamTestCase
+    {
+        private Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.DEFAULT_MODE);
+            return new TokenStreamComponents(tokenizer, new JapaneseBaseFormFilter(tokenizer));
+        });
+
+
+        [Test]
+        public void TestBasics()
+        {
+            AssertAnalyzesTo(analyzer, "それはまだ実験段階にあります",
+            new String[] { "それ", "は", "まだ", "実験", "段階", "に", "ある", "ます" }
+        );
+        }
+
+        [Test]
+        public void TestKeyword()
+        {
+            CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("あり"), false);
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer source = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.DEFAULT_MODE);
+                TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
+                return new TokenStreamComponents(source, new JapaneseBaseFormFilter(sink));
+            });
+
+            AssertAnalyzesTo(a, "それはまだ実験段階にあります",
+                new String[] { "それ", "は", "まだ", "実験", "段階", "に", "あり", "ます" }
+            );
+        }
+
+        [Test]
+        public void TestEnglish()
+        {
+            AssertAnalyzesTo(analyzer, "this atest",
+                new String[] { "this", "atest" });
+        }
+
+        [Test]
+        public void TestRandomStrings()
+        {
+            CheckRandomData(Random(), analyzer, AtLeast(1000));
+        }
+
+        [Test]
+        public void TestEmptyTerm()
+        {
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer, new JapaneseBaseFormFilter(tokenizer));
+            });
+
+            CheckOneTerm(a, "", "");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseBaseFormFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseBaseFormFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseBaseFormFilterFactory.cs
new file mode 100644
index 0000000..61a8b2e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseBaseFormFilterFactory.cs
@@ -0,0 +1,60 @@
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Simple tests for <see cref="JapaneseBaseFormFilterFactory"/>
+    /// </summary>
+    public class TestJapaneseBaseFormFilterFactory : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestBasics()
+        {
+            JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new Dictionary<String, String>());
+            tokenizerFactory.Inform(new StringMockResourceLoader(""));
+            TokenStream ts = tokenizerFactory.Create(new StringReader("それはまだ実験段階にあります"));
+            JapaneseBaseFormFilterFactory factory = new JapaneseBaseFormFilterFactory(new Dictionary<String, String>());
+            ts = factory.Create(ts);
+            AssertTokenStreamContents(ts,
+                new String[] { "それ", "は", "まだ", "実験", "段階", "に", "ある", "ます" }
+            );
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            try
+            {
+                new JapaneseBaseFormFilterFactory(new Dictionary<String, String>() {
+                    { "bogusArg", "bogusValue" }
+                });
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseIterationMarkCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseIterationMarkCharFilter.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseIterationMarkCharFilter.cs
new file mode 100644
index 0000000..9db0903
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseIterationMarkCharFilter.cs
@@ -0,0 +1,241 @@
+using NUnit.Framework;
+using System;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TestJapaneseIterationMarkCharFilter : BaseTokenStreamTestCase
+    {
+        private Analyzer keywordAnalyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+            return new TokenStreamComponents(tokenizer, tokenizer);
+        },
+            initReader: (fieldName, reader) =>
+            {
+                return new JapaneseIterationMarkCharFilter(reader);
+            });
+
+
+        private Analyzer japaneseAnalyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new JapaneseTokenizer(reader, null, false, JapaneseTokenizerMode.SEARCH);
+            return new TokenStreamComponents(tokenizer, tokenizer);
+        },
+            initReader: (fieldName, reader) =>
+            {
+                return new JapaneseIterationMarkCharFilter(reader);
+            });
+
+        [Test]
+        public void TestKanji()
+        {
+            // Test single repetition
+            AssertAnalyzesTo(keywordAnalyzer, "時々", new String[] { "時時" });
+            AssertAnalyzesTo(japaneseAnalyzer, "時々", new String[] { "時時" });
+
+            // Test multiple repetitions
+            AssertAnalyzesTo(keywordAnalyzer, "馬鹿々々しい", new String[] { "馬鹿馬鹿しい" });
+            AssertAnalyzesTo(japaneseAnalyzer, "馬鹿々々しい", new String[] { "馬鹿馬鹿しい" });
+        }
+
+        [Test]
+        public void TestKatakana()
+        {
+            // Test single repetition
+            AssertAnalyzesTo(keywordAnalyzer, "ミスヾ", new String[] { "ミスズ" });
+            AssertAnalyzesTo(japaneseAnalyzer, "ミスヾ", new String[] { "ミ", "スズ" }); // Side effect
+        }
+
+        [Test]
+        public void testHiragana()
+        {
+            // Test single unvoiced iteration
+            AssertAnalyzesTo(keywordAnalyzer, "おゝの", new String[] { "おおの" });
+            AssertAnalyzesTo(japaneseAnalyzer, "おゝの", new String[] { "お", "おの" }); // Side effect
+
+            // Test single voiced iteration
+            AssertAnalyzesTo(keywordAnalyzer, "みすゞ", new String[] { "みすず" });
+            AssertAnalyzesTo(japaneseAnalyzer, "みすゞ", new String[] { "みすず" });
+
+            // Test single voiced iteration
+            AssertAnalyzesTo(keywordAnalyzer, "じゞ", new String[] { "じじ" });
+            AssertAnalyzesTo(japaneseAnalyzer, "じゞ", new String[] { "じじ" });
+
+            // Test single unvoiced iteration with voiced iteration
+            AssertAnalyzesTo(keywordAnalyzer, "じゝ", new String[] { "じし" });
+            AssertAnalyzesTo(japaneseAnalyzer, "じゝ", new String[] { "じし" });
+
+            // Test multiple repetitions with voiced iteration
+            AssertAnalyzesTo(keywordAnalyzer, "ところゞゝゝ", new String[] { "ところどころ" });
+            AssertAnalyzesTo(japaneseAnalyzer, "ところゞゝゝ", new String[] { "ところどころ" });
+        }
+
+        [Test]
+        public void TestMalformed()
+        {
+            // We can't iterate c here, so emit as it is
+            AssertAnalyzesTo(keywordAnalyzer, "abcところゝゝゝゝ", new String[] { "abcところcところ" });
+
+            // We can't iterate c (with dakuten change) here, so emit it as-is
+            AssertAnalyzesTo(keywordAnalyzer, "abcところゞゝゝゝ", new String[] { "abcところcところ" });
+
+            // We can't iterate before beginning of stream, so emit characters as-is
+            AssertAnalyzesTo(keywordAnalyzer, "ところゞゝゝゞゝゞ", new String[] { "ところどころゞゝゞ" });
+
+            // We can't iterate an iteration mark only, so emit as-is
+            AssertAnalyzesTo(keywordAnalyzer, "々", new String[] { "々" });
+            AssertAnalyzesTo(keywordAnalyzer, "ゞ", new String[] { "ゞ" });
+            AssertAnalyzesTo(keywordAnalyzer, "ゞゝ", new String[] { "ゞゝ" });
+
+            // We can't iterate a full stop punctuation mark (because we use it as a flush marker)
+            AssertAnalyzesTo(keywordAnalyzer, "。ゝ", new String[] { "。ゝ" });
+            AssertAnalyzesTo(keywordAnalyzer, "。。ゝゝ", new String[] { "。。ゝゝ" });
+
+            // We can iterate other punctuation marks
+            AssertAnalyzesTo(keywordAnalyzer, "？ゝ", new String[] { "？？" });
+
+            // We can not get a dakuten variant of ぽ -- this is also a corner case test for inside()
+            AssertAnalyzesTo(keywordAnalyzer, "ねやぽゞつむぴ", new String[] { "ねやぽぽつむぴ" });
+            AssertAnalyzesTo(keywordAnalyzer, "ねやぽゝつむぴ", new String[] { "ねやぽぽつむぴ" });
+        }
+
+        [Test]
+        public void TestEmpty()
+        {
+            // Empty input stays empty
+            AssertAnalyzesTo(keywordAnalyzer, "", new String[0]);
+            AssertAnalyzesTo(japaneseAnalyzer, "", new String[0]);
+        }
+
+        [Test]
+        public void TestFullStop()
+        {
+            // Test full stops   
+            AssertAnalyzesTo(keywordAnalyzer, "。", new String[] { "。" });
+            AssertAnalyzesTo(keywordAnalyzer, "。。", new String[] { "。。" });
+            AssertAnalyzesTo(keywordAnalyzer, "。。。", new String[] { "。。。" });
+        }
+
+        [Test]
+        public void TestKanjiOnly()
+        {
+            // Test kanji only repetition marks
+            CharFilter filter = new JapaneseIterationMarkCharFilter(
+                new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
+                true, // kanji
+                false // no kana
+            );
+            assertCharFilterEquals(filter, "時時、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。");
+        }
+
+        [Test]
+        public void TestKanaOnly()
+        {
+            // Test kana only repetition marks
+            CharFilter filter = new JapaneseIterationMarkCharFilter(
+                new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
+                false, // no kanji
+                true   // kana
+            );
+            assertCharFilterEquals(filter, "時々、おおのさんと一緒にお寿司が食べたいです。abcところどころ。");
+        }
+
+        [Test]
+        public void TestNone()
+        {
+            // Test no repetition marks
+            CharFilter filter = new JapaneseIterationMarkCharFilter(
+                new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
+                false, // no kanji
+                false  // no kana
+            );
+            assertCharFilterEquals(filter, "時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。");
+        }
+
+        [Test]
+        public void TestCombinations()
+        {
+            AssertAnalyzesTo(keywordAnalyzer, "時々、おゝのさんと一緒にお寿司を食べに行きます。",
+                new String[] { "時時、おおのさんと一緒にお寿司を食べに行きます。" }
+            );
+        }
+
+        [Test]
+        public void TestHiraganaCoverage()
+        {
+            // Test all hiragana iteration variants
+            String source = "かゝがゝきゝぎゝくゝぐゝけゝげゝこゝごゝさゝざゝしゝじゝすゝずゝせゝぜゝそゝぞゝたゝだゝちゝぢゝつゝづゝてゝでゝとゝどゝはゝばゝひゝびゝふゝぶゝへゝべゝほゝぼゝ";
+            String target = "かかがかききぎきくくぐくけけげけここごこささざさししじしすすずすせせぜせそそぞそたただたちちぢちつつづつててでてととどとははばはひひびひふふぶふへへべへほほぼほ";
+            AssertAnalyzesTo(keywordAnalyzer, source, new String[] { target });
+
+            // Test all hiragana iteration variants with dakuten
+            source = "かゞがゞきゞぎゞくゞぐゞけゞげゞこゞごゞさゞざゞしゞじゞすゞずゞせゞぜゞそゞぞゞたゞだゞちゞぢゞつゞづゞてゞでゞとゞどゞはゞばゞひゞびゞふゞぶゞへゞべゞほゞぼゞ";
+            target = "かがががきぎぎぎくぐぐぐけげげげこごごごさざざざしじじじすずずずせぜぜぜそぞぞぞただだだちぢぢぢつづづづてでででとどどどはばばばひびびびふぶぶぶへべべべほぼぼぼ";
+            AssertAnalyzesTo(keywordAnalyzer, source, new String[] { target });
+        }
+
+        [Test]
+        public void TestKatakanaCoverage()
+        {
+            // Test all katakana iteration variants
+            String source = "カヽガヽキヽギヽクヽグヽケヽゲヽコヽゴヽサヽザヽシヽジヽスヽズヽセヽゼヽソヽゾヽタヽダヽチヽヂヽツヽヅヽテヽデヽトヽドヽハヽバヽヒヽビヽフヽブヽヘヽベヽホヽボヽ";
+            String target = "カカガカキキギキククグクケケゲケココゴコササザサシシジシススズスセセゼセソソゾソタタダタチチヂチツツヅツテテデテトトドトハハバハヒヒビヒフフブフヘヘベヘホホボホ";
+            AssertAnalyzesTo(keywordAnalyzer, source, new String[] { target });
+
+            // Test all katakana iteration variants with dakuten
+            source = "カヾガヾキヾギヾクヾグヾケヾゲヾコヾゴヾサヾザヾシヾジヾスヾズヾセヾゼヾソヾゾヾタヾダヾチヾヂヾツヾヅヾテヾデヾトヾドヾハヾバヾヒヾビヾフヾブヾヘヾベヾホヾボヾ";
+            target = "カガガガキギギギクグググケゲゲゲコゴゴゴサザザザシジジジスズズズセゼゼゼソゾゾゾタダダダチヂヂヂツヅヅヅテデデデトドドドハバババヒビビビフブブブヘベベベホボボボ";
+            AssertAnalyzesTo(keywordAnalyzer, source, new String[] { target });
+        }
+
+        [Test]
+        public void TestRandomStrings()
+        {
+            // Blast some random strings through
+            CheckRandomData(Random(), keywordAnalyzer, 1000 * RANDOM_MULTIPLIER);
+        }
+
+        [Test]
+        public void TestRandomHugeStrings()
+        {
+            // Blast some random strings through
+            CheckRandomData(Random(), keywordAnalyzer, 100 * RANDOM_MULTIPLIER, 8192);
+        }
+
+        private void assertCharFilterEquals(CharFilter filter, String expected)
+        {
+            String actual = readFully(filter);
+            assertEquals(expected, actual);
+        }
+
+        private String readFully(TextReader stream)
+        {
+            StringBuilder buffer = new StringBuilder();
+            int ch;
+            while ((ch = stream.Read()) != -1)
+            {
+                buffer.append((char)ch);
+            }
+            return buffer.toString();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseIterationMarkCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseIterationMarkCharFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseIterationMarkCharFilterFactory.cs
new file mode 100644
index 0000000..88f71a9
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseIterationMarkCharFilterFactory.cs
@@ -0,0 +1,108 @@
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Simple tests for <see cref="JapaneseIterationMarkCharFilterFactory"/>
+    /// </summary>
+    public class TestJapaneseIterationMarkCharFilterFactory : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestIterationMarksWithKeywordTokenizer()
+        {
+            String text = "時々馬鹿々々しいところゞゝゝミスヾ";
+            JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new Dictionary<String, String>());
+            TextReader filter = filterFactory.Create(new StringReader(text));
+            TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false);
+            AssertTokenStreamContents(tokenStream, new String[] { "時時馬鹿馬鹿しいところどころミスズ" });
+        }
+
+        [Test]
+        public void TestIterationMarksWithJapaneseTokenizer()
+        {
+            JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new Dictionary<String, String>());
+            tokenizerFactory.Inform(new StringMockResourceLoader(""));
+
+            JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new Dictionary<String, String>());
+            TextReader filter = filterFactory.Create(
+                new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
+            );
+            TokenStream tokenStream = tokenizerFactory.Create(filter);
+            AssertTokenStreamContents(tokenStream, new String[] { "時時", "馬鹿馬鹿しい", "ところどころ", "ミ", "スズ" });
+        }
+
+        [Test]
+        public void TestKanjiOnlyIterationMarksWithJapaneseTokenizer()
+        {
+            JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new Dictionary<String, String>());
+            tokenizerFactory.Inform(new StringMockResourceLoader(""));
+
+            IDictionary<String, String> filterArgs = new Dictionary<String, String>();
+            filterArgs.Put("normalizeKanji", "true");
+            filterArgs.Put("normalizeKana", "false");
+            JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);
+
+            TextReader filter = filterFactory.Create(
+                new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
+            );
+            TokenStream tokenStream = tokenizerFactory.Create(filter);
+            AssertTokenStreamContents(tokenStream, new String[] { "時時", "馬鹿馬鹿しい", "ところ", "ゞ", "ゝ", "ゝ", "ミス", "ヾ" });
+        }
+
+        [Test]
+        public void TestKanaOnlyIterationMarksWithJapaneseTokenizer()
+        {
+            JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new Dictionary<String, String>());
+            tokenizerFactory.Inform(new StringMockResourceLoader(""));
+
+            IDictionary<String, String> filterArgs = new Dictionary<String, String>();
+            filterArgs.Put("normalizeKanji", "false");
+            filterArgs.Put("normalizeKana", "true");
+            JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);
+
+            TextReader filter = filterFactory.Create(
+                new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
+            );
+            TokenStream tokenStream = tokenizerFactory.Create(filter);
+            AssertTokenStreamContents(tokenStream, new String[] { "時々", "馬鹿", "々", "々", "しい", "ところどころ", "ミ", "スズ" });
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            try
+            {
+                new JapaneseIterationMarkCharFilterFactory(new Dictionary<String, String>() {
+                    { "bogusArg", "bogusValue" }
+                });
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseKatakanaStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseKatakanaStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseKatakanaStemFilter.cs
new file mode 100644
index 0000000..cbbc95b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseKatakanaStemFilter.cs
@@ -0,0 +1,100 @@
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Util;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests for <see cref="JapaneseKatakanaStemFilter"/>
+    /// </summary>
+    public class TestJapaneseKatakanaStemFilter : BaseTokenStreamTestCase
+    {
+        private Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            // Use a MockTokenizer here since this filter doesn't really depend on Kuromoji
+            Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            return new TokenStreamComponents(source, new JapaneseKatakanaStemFilter(source));
+        });
+
+        /**
+         * Test a few common katakana spelling variations.
+         * <p>
+         * English translations are as follows:
+         * <ul>
+         *   <li>copy</li>
+         *   <li>coffee</li>
+         *   <li>taxi</li>
+         *   <li>party</li>
+         *   <li>party (without long sound)</li>
+         *   <li>center</li>
+         * </ul>
+         * Note that we remove a long sound in the case of "coffee" that is required.
+         * </p>
+         */
+        [Test]
+        public void TestStemVariants()
+        {
+            AssertAnalyzesTo(analyzer, "コピー コーヒー タクシー パーティー パーティ センター",
+          new String[] { "コピー", "コーヒ", "タクシ", "パーティ", "パーティ", "センタ" },
+          new int[] { 0, 4, 9, 14, 20, 25 },
+          new int[] { 3, 8, 13, 19, 24, 29 });
+        }
+
+        [Test]
+        public void TestKeyword()
+        {
+            CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("コーヒー"), false);
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
+                return new TokenStreamComponents(source, new JapaneseKatakanaStemFilter(sink));
+            });
+            CheckOneTerm(a, "コーヒー", "コーヒー");
+        }
+
+        [Test]
+        public void TestUnsupportedHalfWidthVariants()
+        {
+            // The below result is expected since only full-width katakana is supported
+            AssertAnalyzesTo(analyzer, "ﾀｸｼｰ", new String[] { "ﾀｸｼｰ" });
+        }
+
+        [Test]
+        public void TestRandomData()
+        {
+            CheckRandomData(Random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+        }
+
+        [Test]
+        public void TestEmptyTerm()
+        {
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer, new JapaneseKatakanaStemFilter(tokenizer));
+            });
+
+            CheckOneTerm(a, "", "");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseKatakanaStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseKatakanaStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseKatakanaStemFilterFactory.cs
new file mode 100644
index 0000000..49ac181
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseKatakanaStemFilterFactory.cs
@@ -0,0 +1,62 @@
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Simple tests for <see cref="JapaneseKatakanaStemFilterFactory"/>
+    /// </summary>
+    public class TestJapaneseKatakanaStemFilterFactory : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestKatakanaStemming()
+        {
+            JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new Dictionary<String, String>());
+            tokenizerFactory.Inform(new StringMockResourceLoader(""));
+            TokenStream tokenStream = tokenizerFactory.Create(
+                new StringReader("明後日パーティーに行く予定がある。図書館で資料をコピーしました。")
+            );
+            JapaneseKatakanaStemFilterFactory filterFactory = new JapaneseKatakanaStemFilterFactory(new Dictionary<String, String>()); ;
+            AssertTokenStreamContents(filterFactory.Create(tokenStream),
+                new String[]{ "明後日", "パーティ", "に", "行く", "予定", "が", "ある",   // パーティー should be stemmed
+                      "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"} // コピー should not be stemmed
+            );
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            try
+            {
+                new JapaneseKatakanaStemFilterFactory(new Dictionary<String, String>() {
+                    { "bogusArg", "bogusValue" }
+                });
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapanesePartOfSpeechStopFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapanesePartOfSpeechStopFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapanesePartOfSpeechStopFilterFactory.cs
new file mode 100644
index 0000000..617a1b8
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapanesePartOfSpeechStopFilterFactory.cs
@@ -0,0 +1,70 @@
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Simple tests for <see cref="JapanesePartOfSpeechStopFilterFactory"/>
+    /// </summary>
+    public class TestJapanesePartOfSpeechStopFilterFactory : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestBasics()
+        {
+            String tags =
+                "#  verb-main:\n" +
+                "動詞-自立\n";
+
+            JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new Dictionary<String, String>());
+            tokenizerFactory.Inform(new StringMockResourceLoader(""));
+            TokenStream ts = tokenizerFactory.Create(new StringReader("私は制限スピードを超える。"));
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            args.Put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
+            args.Put("tags", "stoptags.txt");
+            JapanesePartOfSpeechStopFilterFactory factory = new JapanesePartOfSpeechStopFilterFactory(args);
+            factory.Inform(new StringMockResourceLoader(tags));
+            ts = factory.Create(ts);
+            AssertTokenStreamContents(ts,
+                new String[] { "私", "は", "制限", "スピード", "を" }
+            );
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            try
+            {
+                new JapanesePartOfSpeechStopFilterFactory(new Dictionary<String, String>() {
+                    { "luceneMatchVersion", TEST_VERSION_CURRENT.toString() },
+                    { "bogusArg", "bogusValue" }
+                });
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseReadingFormFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseReadingFormFilter.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseReadingFormFilter.cs
new file mode 100644
index 0000000..141db33
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseReadingFormFilter.cs
@@ -0,0 +1,109 @@
+using Lucene.Net.Analysis.Cjk;
+using Lucene.Net.Analysis.Core;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests for <see cref="TestJapaneseReadingFormFilter"/>
+    /// </summary>
+    public class TestJapaneseReadingFormFilter : BaseTokenStreamTestCase
+    {
+        private Analyzer katakanaAnalyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizerMode.SEARCH);
+            return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer, false));
+        });
+
+        private Analyzer romajiAnalyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizerMode.SEARCH);
+            return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer, true));
+        });
+
+
+        [Test]
+        public void TestKatakanaReadings()
+        {
+            AssertAnalyzesTo(katakanaAnalyzer, "今夜はロバート先生と話した",
+                new String[] { "コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ" }
+            );
+        }
+
+        [Test]
+        public void TestKatakanaReadingsHalfWidth()
+        {
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizerMode.SEARCH);
+                TokenStream stream = new CJKWidthFilter(tokenizer);
+                return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, false));
+            });
+
+            AssertAnalyzesTo(a, "今夜はﾛﾊﾞｰﾄ先生と話した",
+                new String[] { "コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ" }
+            );
+        }
+
+        [Test]
+        public void TestRomajiReadings()
+        {
+            AssertAnalyzesTo(romajiAnalyzer, "今夜はロバート先生と話した",
+                new String[] { "kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta" }
+            );
+        }
+
+        [Test]
+        public void TestRomajiReadingsHalfWidth()
+        {
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizerMode.SEARCH);
+                TokenStream stream = new CJKWidthFilter(tokenizer);
+                return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, true));
+            });
+
+            AssertAnalyzesTo(a, "今夜はﾛﾊﾞｰﾄ先生と話した",
+                new String[] { "kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta" }
+            );
+        }
+
+        [Test]
+        public void TestRandomData()
+        {
+            Random random = Random();
+            CheckRandomData(random, katakanaAnalyzer, 1000 * RANDOM_MULTIPLIER);
+            CheckRandomData(random, romajiAnalyzer, 1000 * RANDOM_MULTIPLIER);
+        }
+
+        [Test]
+        public void TestEmptyTerm()
+        {
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer));
+            });
+
+            CheckOneTerm(a, "", "");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseReadingFormFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseReadingFormFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseReadingFormFilterFactory.cs
new file mode 100644
index 0000000..053652b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseReadingFormFilterFactory.cs
@@ -0,0 +1,59 @@
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Simple tests for <see cref="JapaneseReadingFormFilterFactory"/>
+    /// </summary>
+    public class TestJapaneseReadingFormFilterFactory : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestReadings()
+        {
+            JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new Dictionary<String, String>());
+            tokenizerFactory.Inform(new StringMockResourceLoader(""));
+            TokenStream tokenStream = tokenizerFactory.Create(new StringReader("先ほどベルリンから来ました。"));
+            JapaneseReadingFormFilterFactory filterFactory = new JapaneseReadingFormFilterFactory(new Dictionary<String, String>());
+            AssertTokenStreamContents(filterFactory.Create(tokenStream),
+                new String[] { "サキ", "ホド", "ベルリン", "カラ", "キ", "マシ", "タ" }
+            );
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            try
+            {
+                new JapaneseReadingFormFilterFactory(new Dictionary<String, String>() {
+                { "bogusArg", "bogusValue" }
+            });
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseTokenizer.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseTokenizer.cs
new file mode 100644
index 0000000..0a1f819
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseTokenizer.cs
@@ -0,0 +1,846 @@
+using Lucene.Net.Analysis.Ja.Dict;
+using Lucene.Net.Analysis.Ja.TokenAttributes;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Attributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.IO;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TestJapaneseTokenizer : BaseTokenStreamTestCase
+    {
+        public static UserDictionary ReadDict()
+        {
+            Stream @is = typeof(TestJapaneseTokenizer).getResourceAsStream("userdict.txt");
+            if (@is == null)
+            {
+                throw new Exception("Cannot find userdict.txt in test classpath!");
+            }
+            try
+            {
+                try
+                {
+                    TextReader reader = new StreamReader(@is, Encoding.UTF8);
+                    return new UserDictionary(reader);
+                }
+                finally
+                {
+                    @is.Dispose();
+                }
+            }
+            catch (IOException ioe)
+            {
+                throw new Exception(ioe.ToString(), ioe);
+            }
+        }
+
+        private Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new JapaneseTokenizer(reader, ReadDict(), false, JapaneseTokenizerMode.SEARCH);
+            return new TokenStreamComponents(tokenizer, tokenizer);
+        });
+
+
+        private Analyzer analyzerNormal = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new JapaneseTokenizer(reader, ReadDict(), false, JapaneseTokenizerMode.NORMAL);
+            return new TokenStreamComponents(tokenizer, tokenizer);
+        });
+
+        private Analyzer analyzerNoPunct = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new JapaneseTokenizer(reader, ReadDict(), true, JapaneseTokenizerMode.SEARCH);
+            return new TokenStreamComponents(tokenizer, tokenizer);
+        });
+
+
+        private Analyzer extendedModeAnalyzerNoPunct = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new JapaneseTokenizer(reader, ReadDict(), true, JapaneseTokenizerMode.EXTENDED);
+            return new TokenStreamComponents(tokenizer, tokenizer);
+        });
+
+
+        [Test]
+        public void TestNormalMode()
+        {
+            AssertAnalyzesTo(analyzerNormal,
+                             "シニアソフトウェアエンジニア",
+                             new String[] { "シニアソフトウェアエンジニア" });
+        }
+
+        [Test]
+        public void TestDecomposition1()
+        {
+            AssertAnalyzesTo(analyzerNoPunct, "本来は、貧困層の女性や子供に医療保護を提供するために創設された制度である、" +
+                                 "アメリカ低所得者医療援助制度が、今日では、その予算の約３分の１を老人に費やしている。",
+             new String[] { "本来", "は",  "貧困", "層", "の", "女性", "や", "子供", "に", "医療", "保護", "を",
+                    "提供", "する", "ため", "に", "創設", "さ", "れ", "た", "制度", "で", "ある",  "アメリカ",
+                    "低", "所得", "者", "医療", "援助", "制度", "が",  "今日", "で", "は",  "その",
+                    "予算", "の", "約", "３", "分の", "１", "を", "老人", "に", "費やし", "て", "いる" },
+             new int[] { 0, 2, 4, 6, 7,  8, 10, 11, 13, 14, 16, 18, 19, 21, 23, 25, 26, 28, 29, 30,
+                 31, 33, 34, 37, 41, 42, 44, 45, 47, 49, 51, 53, 55, 56, 58, 60,
+                 62, 63, 64, 65, 67, 68, 69, 71, 72, 75, 76 },
+             new int[] { 2, 3, 6, 7, 8, 10, 11, 13, 14, 16, 18, 19, 21, 23, 25, 26, 28, 29, 30, 31,
+                 33, 34, 36, 41, 42, 44, 45, 47, 49, 51, 52, 55, 56, 57, 60, 62,
+                 63, 64, 65, 67, 68, 69, 71, 72, 75, 76, 78 }
+            );
+        }
+
+        [Test]
+        public void TestDecomposition2()
+        {
+            AssertAnalyzesTo(analyzerNoPunct, "麻薬の密売は根こそぎ絶やさなければならない",
+              new String[] { "麻薬", "の", "密売", "は", "根こそぎ", "絶やさ", "なけれ", "ば", "なら", "ない" },
+              new int[] { 0, 2, 3, 5, 6, 10, 13, 16, 17, 19 },
+              new int[] { 2, 3, 5, 6, 10, 13, 16, 17, 19, 21 }
+            );
+        }
+
+        [Test]
+        public void TestDecomposition3()
+        {
+            AssertAnalyzesTo(analyzerNoPunct, "魔女狩大将マシュー・ホプキンス。",
+              new String[] { "魔女", "狩", "大将", "マシュー", "ホプキンス" },
+              new int[] { 0, 2, 3, 5, 10 },
+              new int[] { 2, 3, 5, 9, 15 }
+            );
+        }
+
+        [Test]
+        public void TestDecomposition4()
+        {
+            AssertAnalyzesTo(analyzer, "これは本ではない",
+              new String[] { "これ", "は", "本", "で", "は", "ない" },
+              new int[] { 0, 2, 3, 4, 5, 6 },
+              new int[] { 2, 3, 4, 5, 6, 8 }
+            );
+        }
+
+        /* Note this is really a stupid test just to see if things arent horribly slow.
+         * ideally the test would actually fail instead of hanging...
+         */
+        [Test]
+        public void TestDecomposition5()
+        {
+            TokenStream ts = analyzer.GetTokenStream("bogus", "くよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよ");
+            try
+            {
+                ts.Reset();
+                while (ts.IncrementToken())
+                {
+
+                }
+                ts.End();
+            }
+            finally
+            {
+                IOUtils.DisposeWhileHandlingException(ts);
+            }
+        }
+
+        /*
+          // NOTE: intentionally fails!  Just trying to debug this
+          // one input...
+        public void testDecomposition6() throws Exception {
+          assertAnalyzesTo(analyzer, "奈良先端科学技術大学院大学",
+            new String[] { "これ", "は", "本", "で", "は", "ない" },
+            new int[] { 0, 2, 3, 4, 5, 6 },
+            new int[] { 2, 3, 4, 5, 6, 8 }
+                           );
+        }
+        */
+
+        /** Tests that sentence offset is incorporated into the resulting offsets */
+        [Test]
+        public void TestTwoSentences()
+        {
+            /*
+            //TokenStream ts = a.tokenStream("foo", "妹の咲子です。俺と年子で、今受験生です。");
+            TokenStream ts = analyzer.tokenStream("foo", "&#x250cdf66<!--\"<!--#<!--;?><!--#<!--#><!---->?>-->;");
+            ts.reset();
+            CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+            while(ts.incrementToken()) {
+              System.out.println("  " + termAtt.toString());
+            }
+            System.out.println("DONE PARSE\n\n");
+            */
+
+            AssertAnalyzesTo(analyzerNoPunct, "魔女狩大将マシュー・ホプキンス。 魔女狩大将マシュー・ホプキンス。",
+              new String[] { "魔女", "狩", "大将", "マシュー", "ホプキンス", "魔女", "狩", "大将", "マシュー", "ホプキンス" },
+              new int[] { 0, 2, 3, 5, 10, 17, 19, 20, 22, 27 },
+              new int[] { 2, 3, 5, 9, 15, 19, 20, 22, 26, 32 }
+            );
+        }
+
+        /** blast some random strings through the analyzer */
+        [Test]
+        public void TestRandomStrings()
+        {
+            CheckRandomData(Random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+            CheckRandomData(Random(), analyzerNoPunct, 1000 * RANDOM_MULTIPLIER);
+        }
+
+        /** blast some random large strings through the analyzer */
+        [Test]
+        public void TestRandomHugeStrings()
+        {
+            Random random = Random();
+            CheckRandomData(random, analyzer, 100 * RANDOM_MULTIPLIER, 8192);
+            CheckRandomData(random, analyzerNoPunct, 100 * RANDOM_MULTIPLIER, 8192);
+        }
+
+        [Test]
+        public void TestRandomHugeStringsMockGraphAfter()
+        {
+            // Randomly inject graph tokens after JapaneseTokenizer:
+            Random random = Random();
+            CheckRandomData(random,
+                            Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+                            {
+                                Tokenizer tokenizer = new JapaneseTokenizer(reader, ReadDict(), false, JapaneseTokenizerMode.SEARCH);
+                                TokenStream graph = new MockGraphTokenFilter(Random(), tokenizer);
+                                return new TokenStreamComponents(tokenizer, graph);
+                            }),
+                    100 * RANDOM_MULTIPLIER, 8192);
+        }
+
+        [Test]
+        public void TestLargeDocReliability()
+        {
+            for (int i = 0; i < 100; i++)
+            {
+                String s = TestUtil.RandomUnicodeString(Random(), 10000);
+                TokenStream ts = analyzer.GetTokenStream("foo", s);
+                try
+                {
+                    ts.Reset();
+                    while (ts.IncrementToken())
+                    {
+                    }
+                    ts.End();
+                }
+                finally
+                {
+                    IOUtils.DisposeWhileHandlingException(ts);
+                }
+            }
+        }
+
+        /** simple test for supplementary characters */
+        [Test]
+        public void TestSurrogates()
+        {
+            AssertAnalyzesTo(analyzer, "𩬅艱鍟䇹愯瀛",
+              new String[] { "𩬅", "艱", "鍟", "䇹", "愯", "瀛" });
+        }
+
+        /** random test ensuring we don't ever split supplementaries */
+        [Test]
+        public void TestSurrogates2()
+        {
+            int numIterations = AtLeast(10000);
+            for (int i = 0; i < numIterations; i++)
+            {
+                if (VERBOSE)
+                {
+                    Console.WriteLine("\nTEST: iter=" + i);
+                }
+                String s = TestUtil.RandomUnicodeString(Random(), 100);
+                TokenStream ts = analyzer.GetTokenStream("foo", s);
+                try
+                {
+                    ICharTermAttribute termAtt = ts.AddAttribute<ICharTermAttribute>();
+                    ts.Reset();
+                    while (ts.IncrementToken())
+                    {
+                        assertTrue(UnicodeUtil.ValidUTF16String(termAtt));
+                    }
+                    ts.End();
+                }
+                finally
+                {
+                    IOUtils.DisposeWhileHandlingException(ts);
+                }
+            }
+        }
+
+        [Test]
+        public void TestOnlyPunctuation()
+        {
+            TokenStream ts = analyzerNoPunct.GetTokenStream("foo", "。、。。");
+            try
+            {
+                ts.Reset();
+                assertFalse(ts.IncrementToken());
+                ts.End();
+            }
+            finally
+            {
+                IOUtils.DisposeWhileHandlingException(ts);
+            }
+        }
+
+        [Test]
+        public void TestOnlyPunctuationExtended()
+        {
+            TokenStream ts = extendedModeAnalyzerNoPunct.GetTokenStream("foo", "......");
+            try
+            {
+                ts.Reset();
+                assertFalse(ts.IncrementToken());
+                ts.End();
+            }
+            finally
+            {
+                IOUtils.DisposeWhileHandlingException(ts);
+            }
+        }
+
+        // note: test is kinda silly since kuromoji emits punctuation tokens.
+        // but, when/if we filter these out it will be useful.
+        [Test]
+        public void TestEnd()
+        {
+            AssertTokenStreamContents(analyzerNoPunct.GetTokenStream("foo", "これは本ではない"),
+                new String[] { "これ", "は", "本", "で", "は", "ない" },
+                new int[] { 0, 2, 3, 4, 5, 6 },
+                new int[] { 2, 3, 4, 5, 6, 8 },
+                new int?(8)
+            );
+
+            AssertTokenStreamContents(analyzerNoPunct.GetTokenStream("foo", "これは本ではない    "),
+                new String[] { "これ", "は", "本", "で", "は", "ない" },
+                new int[] { 0, 2, 3, 4, 5, 6, 8 },
+                new int[] { 2, 3, 4, 5, 6, 8, 9 },
+                new int?(12)
+            );
+        }
+
+        [Test]
+        public void TestUserDict()
+        {
+            // Not a great test because w/o userdict.txt the
+            // segmentation is the same:
+            AssertTokenStreamContents(analyzer.GetTokenStream("foo", "関西国際空港に行った"),
+                                      new String[] { "関西", "国際", "空港", "に", "行っ", "た" },
+                                      new int[] { 0, 2, 4, 6, 7, 9 },
+                                      new int[] { 2, 4, 6, 7, 9, 10 },
+                                      new int?(10)
+            );
+        }
+
+        [Test]
+        public void TestUserDict2()
+        {
+            // Better test: w/o userdict the segmentation is different:
+            AssertTokenStreamContents(analyzer.GetTokenStream("foo", "朝青龍"),
+                                      new String[] { "朝青龍" },
+                                      new int[] { 0 },
+                                      new int[] { 3 },
+                                      new int?(3)
+            );
+        }
+
+        [Test]
+        public void TestUserDict3()
+        {
+            // Test entry that breaks into multiple tokens:
+            AssertTokenStreamContents(analyzer.GetTokenStream("foo", "abcd"),
+                                      new String[] { "a", "b", "cd" },
+                                      new int[] { 0, 1, 2 },
+                                      new int[] { 1, 2, 4 },
+                                      new int?(4)
+            );
+        }
+
+        // HMM: fails (segments as a/b/cd/efghij)... because the
+        // two paths have exactly equal paths (1 KNOWN + 1
+        // UNKNOWN) and we don't seem to favor longer KNOWN /
+        // shorter UNKNOWN matches:
+
+        /*
+        public void testUserDict4()  {
+          // Test entry that has another entry as prefix
+          assertTokenStreamContents(analyzer.tokenStream("foo", "abcdefghij"),
+                                    new String[] { "ab", "cd", "efg", "hij"  },
+                                    new int[] { 0, 2, 4, 7 },
+                                    new int[] { 2, 4, 7, 10 },
+                                    new int?(10)
+          );
+        }
+        */
+
+        [Test]
+        public void TestSegmentation()
+        {
+            // Skip tests for Michelle Kwan -- UniDic segments Kwan as ク ワン
+            //   String input = "ミシェル・クワンが優勝しました。スペースステーションに行きます。うたがわしい。";
+            //   String[] surfaceForms = {
+            //        "ミシェル", "・", "クワン", "が", "優勝", "し", "まし", "た", "。",
+            //        "スペース", "ステーション", "に", "行き", "ます", "。",
+            //        "うたがわしい", "。"
+            //   };
+            String input = "スペースステーションに行きます。うたがわしい。";
+            String[]
+            surfaceForms = {
+                "スペース", "ステーション", "に", "行き", "ます", "。",
+                "うたがわしい", "。"
+            };
+            AssertAnalyzesTo(analyzer,
+                             input,
+                             surfaceForms);
+        }
+
+        [Test]
+        public void TestLatticeToDot()
+        {
+            GraphvizFormatter gv2 = new GraphvizFormatter(ConnectionCosts.GetInstance());
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                JapaneseTokenizer tokenizer = new JapaneseTokenizer(reader, ReadDict(), false, JapaneseTokenizerMode.SEARCH)
+                {
+                    GraphvizFormatter = gv2
+                };
+                return new TokenStreamComponents(tokenizer, tokenizer);
+            });
+
+
+            String input = "スペースステーションに行きます。うたがわしい。";
+            String[] surfaceForms = {
+                "スペース", "ステーション", "に", "行き", "ます", "。",
+                "うたがわしい", "。"
+            };
+            AssertAnalyzesTo(analyzer,
+                             input,
+                             surfaceForms);
+
+
+            assertTrue(gv2.Finish().IndexOf("22.0") != -1);
+        }
+
+        private void assertReadings(String input, params String[] readings)
+        {
+            TokenStream ts = analyzer.GetTokenStream("ignored", input);
+            try
+            {
+                IReadingAttribute readingAtt = ts.AddAttribute<IReadingAttribute>();
+                ts.Reset();
+                foreach (String reading in readings)
+                {
+                    assertTrue(ts.IncrementToken());
+                    assertEquals(reading, readingAtt.GetReading());
+                }
+                assertFalse(ts.IncrementToken());
+                ts.End();
+            }
+            finally
+            {
+                IOUtils.DisposeWhileHandlingException(ts);
+            }
+        }
+
+        private void assertPronunciations(String input, params String[] pronunciations)
+        {
+            TokenStream ts = analyzer.GetTokenStream("ignored", input);
+            try
+            {
+                IReadingAttribute readingAtt = ts.AddAttribute<IReadingAttribute>();
+                ts.Reset();
+                foreach (String pronunciation in pronunciations)
+                {
+                    assertTrue(ts.IncrementToken());
+                    assertEquals(pronunciation, readingAtt.GetPronunciation());
+                }
+                assertFalse(ts.IncrementToken());
+                ts.End();
+            }
+            finally
+            {
+                IOUtils.DisposeWhileHandlingException(ts);
+            }
+        }
+
+        private void assertBaseForms(String input, params String[] baseForms)
+        {
+            TokenStream ts = analyzer.GetTokenStream("ignored", input);
+            try
+            {
+                IBaseFormAttribute baseFormAtt = ts.AddAttribute<IBaseFormAttribute>();
+                ts.Reset();
+                foreach (String baseForm in baseForms)
+                {
+                    assertTrue(ts.IncrementToken());
+                    assertEquals(baseForm, baseFormAtt.GetBaseForm());
+                }
+                assertFalse(ts.IncrementToken());
+                ts.End();
+            }
+            finally
+            {
+                IOUtils.DisposeWhileHandlingException(ts);
+            }
+        }
+
+        private void assertInflectionTypes(String input, params String[] inflectionTypes)
+        {
+            TokenStream ts = analyzer.GetTokenStream("ignored", input);
+            try
+            {
+                IInflectionAttribute inflectionAtt = ts.AddAttribute<IInflectionAttribute>();
+                ts.Reset();
+                foreach (String inflectionType in inflectionTypes)
+                {
+                    assertTrue(ts.IncrementToken());
+                    assertEquals(inflectionType, inflectionAtt.GetInflectionType());
+                }
+                assertFalse(ts.IncrementToken());
+                ts.End();
+            }
+            finally
+            {
+                IOUtils.DisposeWhileHandlingException(ts);
+            }
+        }
+
+        private void assertInflectionForms(String input, params String[] inflectionForms)
+        {
+            TokenStream ts = analyzer.GetTokenStream("ignored", input);
+            try
+            {
+                IInflectionAttribute inflectionAtt = ts.AddAttribute<IInflectionAttribute>();
+                ts.Reset();
+                foreach (String inflectionForm in inflectionForms)
+                {
+                    assertTrue(ts.IncrementToken());
+                    assertEquals(inflectionForm, inflectionAtt.GetInflectionForm());
+                }
+                assertFalse(ts.IncrementToken());
+                ts.End();
+            }
+            finally
+            {
+                IOUtils.DisposeWhileHandlingException(ts);
+            }
+        }
+
+        private void assertPartsOfSpeech(String input, params String[] partsOfSpeech)
+        {
+            TokenStream ts = analyzer.GetTokenStream("ignored", input);
+            try
+            {
+                IPartOfSpeechAttribute partOfSpeechAtt = ts.AddAttribute<IPartOfSpeechAttribute>();
+                ts.Reset();
+                foreach (String partOfSpeech in partsOfSpeech)
+                {
+                    assertTrue(ts.IncrementToken());
+                    assertEquals(partOfSpeech, partOfSpeechAtt.GetPartOfSpeech());
+                }
+                assertFalse(ts.IncrementToken());
+                ts.End();
+            }
+            finally
+            {
+                IOUtils.DisposeWhileHandlingException(ts);
+            }
+        }
+
+        [Test]
+        public void TestReadings()
+        {
+            assertReadings("寿司が食べたいです。",
+                           "スシ",
+                           "ガ",
+                           "タベ",
+                           "タイ",
+                           "デス",
+                           "。");
+        }
+
+        [Test]
+        public void TestReadings2()
+        {
+            assertReadings("多くの学生が試験に落ちた。",
+                           "オオク",
+                           "ノ",
+                           "ガクセイ",
+                           "ガ",
+                           "シケン",
+                           "ニ",
+                           "オチ",
+                           "タ",
+                           "。");
+        }
+
+        [Test]
+        public void TestPronunciations()
+        {
+            assertPronunciations("寿司が食べたいです。",
+                                 "スシ",
+                                 "ガ",
+                                 "タベ",
+                                 "タイ",
+                                 "デス",
+                                 "。");
+        }
+
+        [Test]
+        public void TestPronunciations2()
+        {
+            // pronunciation differs from reading here
+            assertPronunciations("多くの学生が試験に落ちた。",
+                                 "オーク",
+                                 "ノ",
+                                 "ガクセイ",
+                                 "ガ",
+                                 "シケン",
+                                 "ニ",
+                                 "オチ",
+                                 "タ",
+                                 "。");
+        }
+
+        [Test]
+        public void TestBasicForms()
+        {
+            assertBaseForms("それはまだ実験段階にあります。",
+                            null,
+                            null,
+                            null,
+                            null,
+                            null,
+                            null,
+                            "ある",
+                            null,
+                            null);
+        }
+
+        [Test]
+        public void TestInflectionTypes()
+        {
+            assertInflectionTypes("それはまだ実験段階にあります。",
+                                  null,
+                                  null,
+                                  null,
+                                  null,
+                                  null,
+                                  null,
+                                  "五段・ラ行",
+                                  "特殊・マス",
+                                  null);
+        }
+
+        [Test]
+        public void TestInflectionForms()
+        {
+            assertInflectionForms("それはまだ実験段階にあります。",
+                                  null,
+                                  null,
+                                  null,
+                                  null,
+                                  null,
+                                  null,
+                                  "連用形",
+                                  "基本形",
+                                  null);
+        }
+
+        [Test]
+        public void TestPartOfSpeech()
+        {
+            assertPartsOfSpeech("それはまだ実験段階にあります。",
+                                "名詞-代名詞-一般",
+                                "助詞-係助詞",
+                                "副詞-助詞類接続",
+                                "名詞-サ変接続",
+                                "名詞-一般",
+                                "助詞-格助詞-一般",
+                                "動詞-自立",
+                                "助動詞",
+                                "記号-句点");
+        }
+
+        // TODO: the next 2 tests are no longer using the first/last word ids, maybe lookup the words and fix?
+        // do we have a possibility to actually lookup the first and last word from dictionary?
+        [Test]
+        public void TestYabottai()
+        {
+            AssertAnalyzesTo(analyzer, "やぼったい",
+                             new String[] { "やぼったい" });
+        }
+
+        [Test]
+        public void TestTsukitosha()
+        {
+            AssertAnalyzesTo(analyzer, "突き通しゃ",
+                             new String[] { "突き通しゃ" });
+        }
+
+        [Test]
+        public void TestBocchan()
+        {
+            doTestBocchan(1);
+        }
+
+        [Test, LongRunningTest]//@Nightly
+        [Ignore("This test takes a long time to run - do it manually")]
+        public void TestBocchanBig()
+        {
+            doTestBocchan(100);
+        }
+
+        /*
+        public void testWikipedia()  {
+          final FileInputStream fis = new FileInputStream("/q/lucene/jawiki-20120220-pages-articles.xml");
+          final Reader r = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8));
+
+          final long startTimeNS = System.nanoTime();
+          boolean done = false;
+          long compoundCount = 0;
+          long nonCompoundCount = 0;
+          long netOffset = 0;
+          while (!done) {
+            final TokenStream ts = analyzer.tokenStream("ignored", r);
+            ts.reset();
+            final PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
+            final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
+            int count = 0;
+            while (true) {
+              if (!ts.incrementToken()) {
+                done = true;
+                break;
+              }
+              count++;
+              if (posIncAtt.getPositionIncrement() == 0) {
+                compoundCount++;
+              } else {
+                nonCompoundCount++;
+                if (nonCompoundCount % 1000000 == 0) {
+                  System.out.println(String.format("%.2f msec [pos=%d, %d, %d]",
+                                                   (System.nanoTime()-startTimeNS)/1000000.0,
+                                                   netOffset + offsetAtt.startOffset(),
+                                                   nonCompoundCount,
+                                                   compoundCount));
+                }
+              }
+              if (count == 100000000) {
+                System.out.println("  again...");
+                break;
+              }
+            }
+            ts.end();
+            netOffset += offsetAtt.endOffset();
+          }
+          System.out.println("compoundCount=" + compoundCount + " nonCompoundCount=" + nonCompoundCount);
+          r.close();
+        }
+        */
+
+
+        private void doTestBocchan(int numIterations)
+        {
+            TextReader reader = new StreamReader(
+                this.GetType().getResourceAsStream("bocchan.utf-8"), Encoding.UTF8);
+            String line = reader.ReadLine();
+            reader.Dispose();
+
+            if (VERBOSE)
+            {
+                Console.WriteLine("Test for Bocchan without pre-splitting sentences");
+            }
+
+            /*
+            if (numIterations > 1) {
+              // warmup
+              for (int i = 0; i < numIterations; i++) {
+                final TokenStream ts = analyzer.tokenStream("ignored", line);
+                ts.reset();
+                while(ts.incrementToken());
+              }
+            }
+            */
+
+            long totalStart = Environment.TickCount;
+            for (int i = 0; i < numIterations; i++)
+            {
+                TokenStream ts = analyzer.GetTokenStream("ignored", line);
+                try
+                {
+                    ts.Reset();
+                    while (ts.IncrementToken()) ;
+                    ts.End();
+                }
+                finally
+                {
+                    IOUtils.DisposeWhileHandlingException(ts);
+                }
+            }
+            String[] sentences = Regex.Split(line, "、|。");
+            if (VERBOSE)
+            {
+                Console.WriteLine("Total time : " + (Environment.TickCount - totalStart));
+                Console.WriteLine("Test for Bocchan with pre-splitting sentences (" + sentences.Length + " sentences)");
+            }
+            totalStart = Environment.TickCount;
+            for (int i = 0; i < numIterations; i++)
+            {
+                foreach (String sentence in sentences)
+                {
+                    TokenStream ts = analyzer.GetTokenStream("ignored", sentence);
+                    try
+                    {
+                        ts.Reset();
+                        while (ts.IncrementToken()) ;
+                        ts.End();
+                    }
+                    finally
+                    {
+                        IOUtils.DisposeWhileHandlingException(ts);
+                    }
+                }
+            }
+            if (VERBOSE)
+            {
+                Console.WriteLine("Total time : " + (Environment.TickCount - totalStart));
+            }
+        }
+
+        [Test]
+        public void TestWithPunctuation()
+        {
+            AssertAnalyzesTo(analyzerNoPunct, "羽田。空港",
+                             new String[] { "羽田", "空港" },
+                             new int[] { 1, 1 });
+        }
+
+        [Test]
+        public void TestCompoundOverPunctuation()
+        {
+            AssertAnalyzesToPositions(analyzerNoPunct, "dεε϶ϢϏΎϷΞͺ羽田",
+                                      new String[] { "d", "ε", "ε", "ϢϏΎϷΞͺ", "羽田" },
+                                      new int[] { 1, 1, 1, 1, 1 },
+                                      new int[] { 1, 1, 1, 1, 1 });
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseTokenizerFactory.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseTokenizerFactory.cs
new file mode 100644
index 0000000..91fbf16
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseTokenizerFactory.cs
@@ -0,0 +1,134 @@
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Simple tests for <see cref="JapaneseTokenizerFactory"/>
+    /// </summary>
+    public class TestJapaneseTokenizerFactory : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestSimple()
+        {
+            JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(new Dictionary<String, String>());
+            factory.Inform(new StringMockResourceLoader(""));
+            TokenStream ts = factory.Create(new StringReader("これは本ではない"));
+            AssertTokenStreamContents(ts,
+                new String[] { "これ", "は", "本", "で", "は", "ない" },
+                new int[] { 0, 2, 3, 4, 5, 6 },
+                new int[] { 2, 3, 4, 5, 6, 8 }
+            );
+        }
+
+        /**
+         * Test that search mode is enabled and working by default
+         */
+        [Test]
+        public void TestDefaults()
+        {
+            JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(new Dictionary<String, String>());
+            factory.Inform(new StringMockResourceLoader(""));
+            TokenStream ts = factory.Create(new StringReader("シニアソフトウェアエンジニア"));
+            AssertTokenStreamContents(ts,
+                new String[] { "シニア", "シニアソフトウェアエンジニア", "ソフトウェア", "エンジニア" }
+            );
+        }
+
+        /**
+         * Test mode parameter: specifying normal mode
+         */
+        [Test]
+        public void TestMode()
+        {
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            args.Put("mode", "normal");
+            JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
+            factory.Inform(new StringMockResourceLoader(""));
+            TokenStream ts = factory.Create(new StringReader("シニアソフトウェアエンジニア"));
+            AssertTokenStreamContents(ts,
+                new String[] { "シニアソフトウェアエンジニア" }
+            );
+        }
+
+        /**
+         * Test user dictionary
+         */
+        [Test]
+        public void TestUserDict()
+        {
+            String userDict =
+                "# Custom segmentation for long entries\n" +
+                "日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞\n" +
+                "関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,テスト名詞\n" +
+                "# Custom reading for sumo wrestler\n" +
+                "朝青龍,朝青龍,アサショウリュウ,カスタム人名\n";
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            args.Put("userDictionary", "userdict.txt");
+            JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
+            factory.Inform(new StringMockResourceLoader(userDict));
+            TokenStream ts = factory.Create(new StringReader("関西国際空港に行った"));
+            AssertTokenStreamContents(ts,
+                new String[] { "関西", "国際", "空港", "に", "行っ", "た" }
+            );
+        }
+
+        /**
+         * Test preserving punctuation
+         */
+        [Test]
+        public void TestPreservePunctuation()
+        {
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            args.Put("discardPunctuation", "false");
+            JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
+            factory.Inform(new StringMockResourceLoader(""));
+            TokenStream ts = factory.Create(
+                new StringReader("今ノルウェーにいますが、来週の頭日本に戻ります。楽しみにしています！お寿司が食べたいな。。。")
+            );
+            AssertTokenStreamContents(ts,
+                new String[] { "今", "ノルウェー", "に", "い", "ます", "が", "、",
+                    "来週", "の", "頭", "日本", "に", "戻り", "ます", "。",
+                    "楽しみ", "に", "し", "て", "い", "ます", "！",
+                    "お", "寿司", "が", "食べ", "たい", "な", "。", "。", "。" }
+            );
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            try
+            {
+                new JapaneseTokenizerFactory(new Dictionary<String, String>() {
+                    { "bogusArg", "bogusValue" }
+                });
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestSearchMode.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestSearchMode.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestSearchMode.cs
new file mode 100644
index 0000000..bb9fdae
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestSearchMode.cs
@@ -0,0 +1,92 @@
+using NUnit.Framework;
+using System;
+using System.IO;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TestSearchMode : BaseTokenStreamTestCase
+    {
+        private readonly static String SEGMENTATION_FILENAME = "search-segmentation-tests.txt";
+        private readonly Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizerMode.SEARCH);
+            return new TokenStreamComponents(tokenizer, tokenizer);
+        });
+
+
+        /** Test search mode segmentation */
+        [Test]
+        public void TestSearchSegmentation()
+        {
+            Stream @is = typeof(TestSearchMode).getResourceAsStream(SEGMENTATION_FILENAME);
+            if (@is == null)
+            {
+                throw new FileNotFoundException("Cannot find " + SEGMENTATION_FILENAME + " in test classpath");
+            }
+            try
+            {
+                TextReader reader = new StreamReader(@is, Encoding.UTF8);
+                String line = null;
+                int lineNumber = 0;
+
+                while ((line = reader.ReadLine()) != null)
+                {
+                    lineNumber++;
+                    // Remove comments
+                    line = Regex.Replace(line, "#.*$", "");
+                    // Skip empty lines or comment lines
+                    if (line.Trim() == string.Empty)
+                    {
+                        continue;
+                    }
+                    if (VERBOSE)
+                    {
+                        Console.WriteLine("Line no. " + lineNumber + ": " + line);
+                    }
+                    String[] fields = new Regex("\t").Split(line, 2); // Regex.Split(line, "\t", 2);
+                    String sourceText = fields[0];
+                    String[] expectedTokens = Regex.Split(fields[1], "\\s+");
+                    int[] expectedPosIncrs = new int[expectedTokens.Length];
+                    int[] expectedPosLengths = new int[expectedTokens.Length];
+                    for (int tokIDX = 0; tokIDX < expectedTokens.Length; tokIDX++)
+                    {
+                        if (expectedTokens[tokIDX].EndsWith("/0", StringComparison.Ordinal))
+                        {
+                            expectedTokens[tokIDX] = Regex.Replace(expectedTokens[tokIDX], "/0", "");
+                            expectedPosLengths[tokIDX] = expectedTokens.Length - 1;
+                        }
+                        else
+                        {
+                            expectedPosIncrs[tokIDX] = 1;
+                            expectedPosLengths[tokIDX] = 1;
+                        }
+                    }
+                    AssertAnalyzesTo(analyzer, sourceText, expectedTokens, expectedPosIncrs);
+                }
+            }
+            finally
+            {
+                @is.Dispose();
+            }
+        }
+    }
+}

[05/13] lucenenet git commit: Ported Lucene.Net.Analysis.Kuromoji + tests

Posted by ni...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseAnalyzer.cs
new file mode 100644
index 0000000..98d97a0
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestJapaneseAnalyzer.cs
@@ -0,0 +1,229 @@
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Test Kuromoji Japanese morphological analyzer
+    /// </summary>
+    public class TestJapaneseAnalyzer : BaseTokenStreamTestCase
+    {
+        /** This test fails with NPE when the 
+        * stopwords file is missing in classpath */
+        [Test]
+        public void TestResourcesAvailable()
+        {
+            new JapaneseAnalyzer(TEST_VERSION_CURRENT);
+        }
+
+        /**
+         * An example sentence, test removal of particles, etc by POS,
+         * lemmatization with the basic form, and that position increments
+         * and offsets are correct.
+         */
+        [Test]
+        public void TestBasics()
+        {
+            AssertAnalyzesTo(new JapaneseAnalyzer(TEST_VERSION_CURRENT), "多くの学生が試験に落ちた。",
+                new String[] { "多く", "学生", "試験", "落ちる" },
+                new int[] { 0, 3, 6, 9 },
+                new int[] { 2, 5, 8, 11 },
+                new int[] { 1, 2, 2, 2 }
+            );
+        }
+
+        /**
+         * Test that search mode is enabled and working by default
+         */
+        [Test]
+        public void TestDecomposition()
+        {
+
+            Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, JapaneseTokenizerMode.SEARCH,
+                                                    JapaneseAnalyzer.GetDefaultStopSet(),
+                                                JapaneseAnalyzer.GetDefaultStopTags());
+
+            // Senior software engineer:
+            AssertAnalyzesToPositions(a, "シニアソフトウェアエンジニア",
+                                      new String[] { "シニア",
+                                             "シニアソフトウェアエンジニア", // zero pos inc
+                                             "ソフトウェア",
+                                             "エンジニア" },
+                                      new int[] { 1, 0, 1, 1 },
+                                      new int[] { 1, 3, 1, 1 }
+                                      );
+
+            // Senior project manager: also tests katakana spelling variation stemming
+            AssertAnalyzesToPositions(a, "シニアプロジェクトマネージャー",
+                                      new String[] { "シニア",
+                                              "シニアプロジェクトマネージャ", // trailing ー removed by stemming, zero pos inc
+                                              "プロジェクト",
+                                              "マネージャ"}, // trailing ー removed by stemming
+                                      new int[] { 1, 0, 1, 1 },
+                                      new int[] { 1, 3, 1, 1 }
+                                      );
+
+            // Kansai International Airport:
+            AssertAnalyzesToPositions(a, "関西国際空港",
+                                      new String[] { "関西",
+                                             "関西国際空港", // zero pos inc
+                                             "国際",
+                                             "空港" },
+                                      new int[] { 1, 0, 1, 1 },
+                                      new int[] { 1, 3, 1, 1 }
+                                      );
+
+            // Konika Minolta Holdings; not quite the right
+            // segmentation (see LUCENE-3726):
+            AssertAnalyzesToPositions(a, "コニカミノルタホールディングス",
+                                      new String[] { "コニカ",
+                                             "コニカミノルタホールディングス", // zero pos inc
+                                             "ミノルタ",
+                                             "ホールディングス"},
+                                      new int[] { 1, 0, 1, 1 },
+                                      new int[] { 1, 3, 1, 1 }
+                                      );
+
+            // Narita Airport
+            AssertAnalyzesToPositions(a, "成田空港",
+                                      new String[] { "成田",
+                                             "成田空港",
+                                             "空港" },
+                                      new int[] { 1, 0, 1 },
+                                      new int[] { 1, 2, 1 }
+                                      );
+
+            // Kyoto University Baseball Club
+            AssertAnalyzesToPositions(new JapaneseAnalyzer(TEST_VERSION_CURRENT), "京都大学硬式野球部",
+                             new String[] { "京都大",
+                                    "学",
+                                    "硬式",
+                                    "野球",
+                                    "部" },
+                                      new int[] { 1, 1, 1, 1, 1 },
+                                      new int[] { 1, 1, 1, 1, 1 });
+            // toDotFile(a, "成田空港", "/mnt/scratch/out.dot");
+        }
+
+
+        /**
+         * blast random strings against the analyzer
+         */
+        [Test]
+        public void TestRandom()
+        {
+            Random random = Random();
+            Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, JapaneseTokenizerMode.SEARCH,
+                                                    JapaneseAnalyzer.GetDefaultStopSet(),
+                                                    JapaneseAnalyzer.GetDefaultStopTags());
+            CheckRandomData(random, a, AtLeast(10000));
+        }
+
+        /** blast some random large strings through the analyzer */
+        [Test]
+        public void TestRandomHugeStrings()
+        {
+            Random random = Random();
+            Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, JapaneseTokenizerMode.SEARCH,
+                JapaneseAnalyzer.GetDefaultStopSet(),
+                JapaneseAnalyzer.GetDefaultStopTags());
+            CheckRandomData(random, a, 100 * RANDOM_MULTIPLIER, 8192);
+        }
+
+        // Copied from TestJapaneseTokenizer, to make sure passing
+        // user dict to analyzer works:
+        [Test]
+        public void TestUserDict3()
+        {
+            // Test entry that breaks into multiple tokens:
+            Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, TestJapaneseTokenizer.ReadDict(),
+                                                    JapaneseTokenizerMode.SEARCH,
+                                                    JapaneseAnalyzer.GetDefaultStopSet(),
+                                                    JapaneseAnalyzer.GetDefaultStopTags());
+            AssertTokenStreamContents(a.GetTokenStream("foo", "abcd"),
+                                      new String[] { "a", "b", "cd" },
+                                      new int[] { 0, 1, 2 },
+                                      new int[] { 1, 2, 4 },
+                                      new int?(4)
+            );
+        }
+
+        // LUCENE-3897: this string (found by running all jawiki
+        // XML through JapaneseAnalyzer) caused AIOOBE
+        [Test]
+        public void TestCuriousString()
+        {
+            Random random = Random();
+            String s = "&lt;li&gt;06:26 2004年3月21日 [[利用者:Kzhr|Kzhr]] &quot;お菓子な家族&quot; を削除しました &lt;em&gt;&lt;nowiki&gt;(即時削除: 悪戯。内容: &amp;#39;ＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫ
 ＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫ�
 ��ＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫ�
 �ＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫＫ
 ＫＫＫＫＫＫＫＫＫＫＫＫＫＫ&amp;#39;)&lt;/nowiki&gt;&lt;/em&gt;&lt;/li&gt;";
+            Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, JapaneseTokenizerMode.SEARCH,
+                                                    JapaneseAnalyzer.GetDefaultStopSet(),
+                                                    JapaneseAnalyzer.GetDefaultStopTags());
+            CheckAnalysisConsistency(random, a, random.nextBoolean(), s);
+        }
+
+        // LUCENE-3897: this string (found by
+        // testHugeRandomStrings) tripped assert
+        [Test]
+        public void TestAnotherCuriousString()
+        {
+            Random random = Random();
+            String s = "《〔〘〝」〩〄〯』〴〷〦〯〹】〰。　〆。〡〢〲〆〤〫〱　〜々〲〿〄》〃】〚〗〪〓〨々〮〹〟〯〫』」〨〒〜〃〃〡　〸〜〱〆〿「〱〳。〷〆〃〷〇〛〥〒〖〪〕〦〚〉〷〼〣〒。〕〣〻〒〻〼〔〸〿〖〖〆々〭《〟〚〇〕〸〲〄〿〙」〞〖〪〬〤【〵〘〃々〦〮〠〦〛〲〝〿〽〓〺〷〛》〛『》〇　〽〄〱〙〥〠』〨〉〨〔」》〮〥〽〔〰〄〶】〠〶〨〔々『。〞〙〮》【　〯〦〯〩〩〈〿〫〘〒》』〾〰〰〼〒「〝〰〱〞〹〔〪〭、〬〴【』〧〩】〈。〧〤〢〨〶〄〴〡。〪〭〞〷〣〘〳〄〬〙『　「」【〮〯〔〱〬〴〵〭〬〚〱、〚〣、〚〓〮、〚々】〼〿〦〫〛〲〆〕々。〨〩〇〫〵『『〣〮〜〫〃】〡〯』〆〫〺〻〬〺、〗】〓〕〶〇〞〬。」〃〮〇〞〷〰〲】〆〻。〬〻〄〜〃〲〺〧〘〇〈、〃〚〇〉�
 ��〬〣〨〮〆〴〻〒〖〄〒〳〗〶、〙「　〫〚《〩〆〱〡【〶』【〆〫】〢》〔。〵〴〽々〱〖〳〶〱《〈〒』『〝〘【〈〢〝〠〣「〤〆〢〈〚〕〿〣々〢〹〉〡　〷《〤〴『々〉〤〬《』々〾〔〚〆〔〴〪〩〸〦』〉〃　《〼〇〆〾〛〿」〧〝〽〘〠〻【〰〨〥《〯〝〩〩〱〇〳々〚〉〔『〹〳〳』〲『〣」〯〓【々〮〥〃〿〳〞〦〦〶〓〬〛〬〈〈〠『〜〥〒〯〜〜〹〲【〓〪《々〗〚〇〜〄〦『々〃〒〇〖〢〉〹〮〩〽『》〵〔】〣〮】〧、〇〰〒】《〈〆々〾〣【〾〲〘〧『〇〲〼〕〙「〪〆〚々〦〯〵〇〤〆〡」〪》〼』〴〶〪】『〲〢〭〬〈〠〮〽〓〔〧〖」〃〴〬』〣〝〯〣〴『〉〖〄〇〄〰〇〃〤、〤》〔〴〯〫〠〝〷〞〩〛〛〳々〓〟〜〛〜〃　〃〛「、』》》々〢〱〢〸〹〙〃〶〇〮〼」〔〶【〙〮々〣　〵〱〈〡〙�
 �、〶〘【〘〄〔『〸〵〫〱〈〙〜〸〩〗〷》〽〃〔〕〡〨〆〺〒〧〴〢〈〯〶〼〚〈〪〘〢〘〶〿〾〹〆〉」〠〴〭〉〡〮〫〸〸〦〟〣」〩〶』《〔〨〫〉〃〚〈〡〾〈〵【〼《〴〸〜〜〓《〡〶〫〉〫〼〱〿〢々〩〡〘〓〛〞〖々〢〩「々〦〣】〤〫〼〚〴〡〠〕〴〭。〟「〞》』「、〛〕〤々〈〺〃〸】〶〽〒〓〙》〶〬〸〧〜〲〬〰〪。〞〒【〭〇〢〝〧〰〹〾》〖「〹」〶〕〜〘〿〩〙〺〡〓〆〵〪〬〨〷〯〃】〤〤〞〸》〈〹〖〲〣〬〲〯〗〉〮「〼〨〓々。〭〆〶〩【〦〿》〩〻〢〔〤〟〯【〷〻〚〟」〗《〓〛。〰〃〭〯〘〣》〩〩〆」【〼〡】〳〿〫〳〼〺〶『〟〧』〳〲〔『〦「〳〃〫〷《〟〶〻〪〆〗〲〮〄〨〻』〟〜〓〣〴〓〉、〷〄〝〭〻〲〽〼〥〒〚〬〙〦〓〢〦〒〄。〛〩〿〹「〶〬〖〬〾〭〽〕〲〤〕〚〢
 〪〸〠〸〠〓〇〄〽〖】〵〮〦〲〸〉〫〢〹〼〗〱〮〢」〝〽〹「〭〥「〠〆〕〃〫々【『〣〝々〧〒〒】〬〖〘〗〰〭〢〠〨〖〶〒》〪〺〇〡》〦〝〾〴〸〓〛〟〞」〓〜。〡』々》〃〼』〨〾】〜〵々〥【〉〾〭〹〯〔〢〺〳〹〜〢〄〵〵〱。〯〹〺〣〭〉〛々〧〫々〛〪。〠〰〖〒〦〠〩〣〾〺〫〬、》「〚〫〲〸〶〧〞〯〨」】【〚〲『〽〡》〘〣〒〕〸『〼〘〿〘〽〤〿〶〫〆〾〔〃〱〫〱〧、　〒〰。〜〸〇〜〔〉〡〬〿〝〼〉〷、〠〘〉』〥〫〧〕》》〡〻〨〲〔〠〮】〰〮」〧〬《〦〼〽〵〭「〷〮〈〴〔〭、〣〔〥〱〔」〄〘》〡〣》〴〙〜〖〬〺〯々〟〗〥〥【〝〨〝〽〼〚。〙』〤〬〞〜〣〮〬〳〽〦〩　〶」〠〄〳〠〇〜〒〶〱々〠『〡』〭〰》〴〉〫〬〒《〽『〉〳〵〄〨〮〔〭〞』〡〚〩〦、〠【〓〯〬〦〛�
 ��〉〜〻『〗〫〞〩〃〼〿〡〕〯　〸』》〼〮〆」〼〪〇〭〣〗〓〻〧」〙〳〱〥〳、〓〕〮〫》〧〃《〣』〹〬〣〶〡〾〙〮〕〶〧《　〨〇〺〳〉《《〕〜〰〱〕〛「〞〩　〓〢〄〣〼〢〽〇〛〟〖〘〳〤〫〡〫〬〦〘〪〶〝《』〜〕〝】〄〡〳〹々〯【〝〝〇〔〹〿〥〄〚〒〻『〺〮〇〲〒〾〙〞〉】〉〪〫〴〒〔〨〮〰〻〷〿〥〮〼〹〩〱〞〈〴〦〰〞【〻〾、〵〻〛〮、〻《〘〱〫〾〄〄〙、〔〖〘　》〻〧〦〃〣〬〰〗】〸〵。〄。〷〄〸〟〰〓、【〖〰〢〾〘〆》〜〶〻》〔〛』〦〩〷〴〃〴〫〱《「〖々〖〒〡〞。〱〡〖〤〫〇〜〒〴〯》〪〶》〘〨》》【〵〹」〤〯《〦〶〯〃〧〙〩」「〤】》々〣〱〯〞〰〢々〵〷〺〾〺〜〜〚〣〿〩〰《〄『〧《〜　〷〓〺〦々〚〨「〧〮《〥〸〞【〡〩〩〱〴〗〙〿『〇〭〖　〹〥�
 �『〗〛〯〷〃〽〝《〳《〡】〄々〱〆〯〦。〒『〡》〨〃〦」〬〄〬〔〭〫〼〲】』〗〔〼〴〹〠」〺〬〺〔々》〾〿「〺〖〤》〴〶〣〚〒【〤〄】〹〺〟〃〜、】〪〚〯〢〹、〶〖〭】〾〠」〉〆〾々〯〈〙〞〶〩】〺〟〫〽〫〸〵〛〙〃「〰〫〓』〻「〦〤〖〺〇。〨〟　〦〙〘〨〸〒〣〈〩〜〧〾〒〕〤　〇〴〮〝〈〿〢〴〟〷〭〴】〽〇〟〦〬〶〲。〫〸〮〝〆〸〄〣〦〲〢〇〫〻〹〕〶〥〖。〨〬【〥〽〓〵〯、〒〉〳〘〧〼〆〹〉〾〬〽】〹〲《〜〨〟〡〪〱〃〓〬〜〧〝〸、〢〝〦』〝〸》】〩〡〉〫〛〇〢〖〔〠〹〧〕〨〃〙「〲〗〙『〛。。《〸〔〾〧〉〠』〡〼〄〨〲〥〼〠〻　「〸〩〟〷【〮〜〧〿〾〜〈。〣〰〪〘〮〴〨【〩〜〟〟〼〻〦〝「〺〝〄〵〝〲〃〨〺〫〜　〮」〡』〜〿、〪々〕〫〃〒〔〛〻〲〹》
 【〚〣〯《〢〙〕〝〾〙〭〄〕〗〄〪〵〃〘〺〻〤〟〢〻〆〥〝】〠〬〧〾〮々〪〓』〷』〿〕〒〽、〷〉」〨〨　〄〽〾「〧〴〜〢〮〚〆〣《〬〺〟〥〼〛〆〓〚々〇々〈〉〗〨〳々〣〭〯〉【〩〮〺〪』〭〚〉〦、〃〘〦〮」〴〆〴〔〴〜「〠〴【〰「〫〳〟〾〶〉〨〲〚〩〷〄《〄〝〈。〧〟〳〃〹々〃〄〭〬〰、〥〬〸〱〉〩〴《〔【〠〳〪〧〫〽〓〭】〧」〮〒〸〤。〩』〭〖〛〭〯〨〕〞〮〞〬〹〺々〽〡〷〪〶「〹〯〝々〭〠〼〰「〒〉」」〡〆〜〾〪〾》〇〙『〚〿〽】〛〮〶〚」《〔〔〣、〄〗〩〭〠」〠〰〞《〸〧〺〰」『〾〯〃〓〓〩〣〚【〜〭　〝〨〗〷〒《〫〝〶〘〣〿〜〱〾〨〥〘〃〳〆〇〈〜〲〪〡〶〭〤『〝〖〷〦〾〬〟〠〳〻、」【〣『〺〞〴〳「〵〺〨々〩〰〢〧〣〃『〹」〉〓〘〦〣〄〕〞〵〧〜�
 ��〴〠〱》〮〬〄〶〆〬」〘　〺。〲。　〾〷〕〛〣〾〗、〭』〭〧〝。〮」々『〻〒〣「〳〩〪〝〒〥〻〘〰〼〭〆〷〭「」〚〔〬〃〝〮〩〪〽〱。〯〯〰〨〿〷「々》【〴〧〻〰、〶〡〹〩〡〺〲〼。〩〿〯』〟〴〼〦〤〙〢〩〔〲〆〗〲《〟〤〬〷〧〫〧〗〞〣〚〚〧〭〮〛〲〮々〩〩〕〬々　〥〸、〢。〿〵〺〤〲〝　〥》々〰」〮〩〛〛』々『〹〞〃〃々〚【〱。。〹〨〿〻〣〞〨〈〤〼〃〻〩〶『　〲〷〗〭〓〯〯〝〃〾〕〻〖〱《「〹〣〦　」〵〄〮〚〖〞〪〼〖〙〵》〰〃〘。【〺〖〄〪〝〭〆〬〚〬〨〽」〕」「〜〤〯〷〇〝〠〆〫〼」〭〤〓〔々〆〵〷〪〭「〆〖〇〽〄〄「〿〵〷〤〿〮〫〻〢〕〝〪〳〸〘〡〡〞〮〻】「〝〷〘〾〒〺〉〨〰』〳〓〃〒〪【〗〯「〧々〷〩〝』〭〇〒、〯〈〦〣〆〬〸〚〈〉〔〥《�
 �》〹〢〺〤〝々《　〲〘】〚』〚『〯〼〾〱〵〻、〪〟〸〯〽〴〱。〵〪〫々〳〢〣〕〓〩「〘〜〨〻】〿〹〭〛〛〔〹〻。〛〴〤〢〮、〸〷〃〜〜〝〔』〳〮〹』〽　〶〛　〤」〢。〣〖〶〯〥》〢〸〸〤〕〣〘》〧〦」〘〻〶〾〮〢〳〝〙〻〦〺〇〲〢〔〘〶〩〖】〟〓〰〇〮』〦〄々〹〻〄〄〽〷〱〫〒〛〉〿〓〯〺〪〲〢〼、〫〬「〩《〡〕〻』〭〜〗〫】　、〈〙〉【〓〣〫〜〈『〾】〴〪〫〬〶〪〚〬〿〪〮〴〒〶〡〄〉〿〼〜〵』〻〼〢「〵。』〸〖〙〧』〾〖〙《〉〪〦〙〔〈　〤〫〦〸『〗「〣『〓『〡〨〖〥〭《〢〠〦〞〸〞〚〢〕〙〖〾、〩」「〗〈〰〸〤〴〶〤〙《々〆〽〆【『〬〝〸『〙、〪〻』〓〹々〥〲〉〪〹〫〓〽〪〩〷、〹〺〩「〞『】〡々〡「〇〉〺〶〾〔々、〾〻〪〣〖〡〩〥〾〯】〤〰》『〲【
 〙〭〽〛〿々〟　〢〃〼〕〫〲』〪【〛〯】〔〕〥』」〳』《〖〥〳〄〢【〩〮〫〥〝〯〿〟、〣〹〪〔〱」〖〢〘〛〾〾〜〒〝〷〚〳〣〝〟《】「》〻『〢〄〄　》〱〓〞〛〢〆〺〉《〃〭〙〻〞〷〩〹〥〦〫〞〄〇〯〽〱〼〴〾〕〸〿〱〪〨〟〠々〪〸〔〵〆」〔〖〴〝〟】《〥　』《〒〄〣〿〞》】〃〹〲〛〬。】〒〓〹〴〿〥〴〲〖〧〝〪〶「〕〔〞〜〸〬〒〽【〸〻〢【〱」〪〉　〉〘〪〻〴〞』〯〰〾〥〓〼〻〕』〠〃〟〩〛〔【〻〡「〘〔　〲々〻〚〈〪〱〾〷〗》〯〞【〩　『〕〪〈々〞〞〳〘〵〃〼〨々〇〞〈〹〧〢〃〢〮〆〈〤〘〬〟〽〩、。〲々〺〠〳〸　〸〹〥、〯〒〈〃〠〰〙〪〯〬〖」〔〹〔〘〶〾〨〿〛〈〡〯〕〶〲、〷【》〷〆》〄《】〒〓〔〼〉〒〢〄〢〓〩〰〃〔。〵〙』。〷〼〩」〒〒〇〳〆〘〯�
 ��〢〠】〱〱《〤〽〢〄〤〵〪」〆〘〲〪〼〷〕〚〙〢〳〲〦〥〃〩〳〤「〽〽〇〖〶〶〾〴〰〷〨『〟〲〬〵〲〸〩〕〣〫】〝〇〡〿〳〦【〧〖〓〫〿〣〖【〙【〵々〶『〵〟〠〇》』〲〹〾〰〰〙〚〖〳〞〄『〤〠〇、。〆〧　〒〘〱〾〢〲〵〇〼〼〪〤〵〓〴〦〵〛『〘』〭〔〯「〓」〤〼〱〒〤〶〰〖〬〻【〳〵〡〃〙〠〩〛〝〰》〸《』〦　〿〭〵〺〈〓〵〛【〴〤〒。〪〷〢〡』〒　〄〚々〽〄〔〖々。〪〠〢〸〮〵〾「〉〙〆〘〣《〩〽〃〄「〕〢〻〉〷〛〫〇〪〯〵《〷〚〕〇〟〔〛『〣〆』〸〶々〳〾《〭〯〫〄〔〗〨〺〛〴》〻〫〨〢〜〱〇〦〘〺〉〫〇〧〿〶〲〉〖〵〦〹〷〳〈〞』』〡〓〺〟〡〭、〧〺〺〱〟」「〠〡「〠〬〰〙〹〥〙〓〶〫〳〣〢〳〇〫』々〡〚「〮〘〭〹〶〸〮【〔〚〆〆〼〷〖〒〤〲〕〳�
 �〾〇〔〹「〦〔〹々〘〲〔〃〡〪〚〪〗〉〓〫〦　」〟〳〛〉〹〺〭〲〆〙〽「〱〘〿〡〭〦】、〠〰〢〥《〶《』〶〃〼〄〪〥〙【。』　〸〳〈〇〡〩〮〃〹〘〧〿〱々〿〭》〶】〥〜、〬〖〠〢。〾〫〔〩〥〫〓」〲〢〛〶〚〡〈」〡〦〼〰〔〾〨〔〄〹〬〛〃〇〸。〽〠〵〙〠【〶〉〇〗〔〒〒〇〉〧《〗〮〟〡《〉〻〧〝〓〱〧〜〘〦【〸〘〩、〵〡〈〴〭『〉〕〴〯〰〘〳。〴〃〙〨〄〈〿〒〕〯」〼〳〤〱『〓〚〛〳〣〳〺〒、〃〚〲〲』〳〃〷〵〹〷〾〞〞〹〣〢〨〵》〽〮〒〹〻〨〜」〇〗〨〙〒〃〆〫〹〉〻。〄〔〧〝〒〷〛〲〧〪〺〚〼〳〒〙〫〢「〲】〾〬〸〷〿〉　〱〛〙〰〜〧》〳〉】】〮〈〗〢〧〟〠〣　〭〵〰「〼〽〭〫〘〴〲〺〾〘「〮〯〩〛〤〣〥〛】〱。〬〴〞〰〣〻〵〹〤〇〴〮〦》『〨〛『〡〞
 〥〄〠〸〽、』〣〬〢〠〯〰〄〇〆》〇〵『〹〛、〃〟〙〡〷〿〩〥〶〲、〓〧〲〪〚〕〞〢〗〖〝〰〵〪〴〿』〱〮〳〫】《〹〟〻〝〓〦〣〞〤〷〠　〃〈〛「〱〿〆〟〟〉〤〿〈〦〥〻《〻〼〇〢〰〢〒【〞〆「〢〻〧〇。〭々【〪『〪〓】〹〃〄〹〕〝〒〚》〔。〕〶〺《』〦〗〳〰〶〨〔々〖《〰〷〛〩〨』〤〻』《。〵〱〼〵〛〝〧〼〡〶〧〾〯〷〞　〧〛。〦〛〪〕〶〱〆〤〻〹〱〰〖〨〥〚々。〾〽〦〸】〛〇〫》〃々々〲《『『〱〘〲〕〦〇〱〈〞」】〞〨〖〚〽〧〥〬〰〬〥〇〡〼〴〲〠〭〖〵〯。〙〪〖〯〄〾〮〗『〉〴〩　〃〚〲〠〨〟』〖〜〥〛〉〲〃〃〮〳〡〳〩　〄々〞〨〛〪「〼〓〭　々〵〘〄〝〭〖〰〾〬〆〸。〻〓〞『〥〗〪〚〇〞〭〤〉〼〬〕【〤】〥〡〛〖〕〆〧〝〧〺《〭〈〸〪〆〺〸〝〭〇、�
 ��〯〴〸〤、〾〒〉〰〛〷〽〶〿〰〫〜〔〪〱〇』』〰〨〞〓〽〻〻〙〪〠〨〗〓〣〨〾。〜〃〘〚〇〟〖〗【〥。〡「〾『〙〢〦〹〩〟〠〘】〾〒〈〔『〣〲〉〉〻『〇〦〽〿〼〾〚〮〧。〷〰〲〧《〹」〕々〻〤〗〦』《〳〢、」〤　〰〞〠〨〾〪〯〮〳〒　〰〜〼〕〰〳〄》〤「〗〽〇〠〔〝〚〽〣》〷〙】〶〷〆』〇》〓〄〤〸【〡】〾『〯〶、〵〨》〼〗〨〶〉〄〭〓〲〞〝〞〡〻〷〻〣〰〈〽〮》〲《。〸〶〿〣〞。】〡」〖〩〔〜〘》〤〦」〓『〨　〹〞『〛〡〧〬〃〷】〔〫〆〤〻〲〆〯〞〿〧〔『　。〓〳〝〢〿〮〯〵〮〨》〴〒」〒〷〻〶〡〽〤〭〽〰》〾〹。〳〔〹》〴〕〫」〹〜〻〦〳〕〺〘〴』〈〽〲〃〔〙。【」〇〨》〨〴〿〄〻」〉》「〚〺〿〹〤』〄〸】〴〩々【「〫〒】〄〛　【〰〯〶〰〉【〮」〦「�
 �》〴〙〿〽〄〔〈〓〻〠」〚〯　〷〄〆〳《〸　〴〕〩〸〾〡〼〻〆〬〶〞〓〤〩〿〪〻、〠「〲〓〠〦〛〢〓〇〸〡〬〱】〞〫〽〖〉、〻〿〈〸〓〹〯〰〸〰〘〫　〬〬〽〦〣〾々〥《〰〗〩〰〞】〪〆〷〳〚《〯〱〓〣〭〗。〬「〢〸〮〤〓〖〾〣』〘〳〕【〼〤〔」〵〰〪〡〲。〤〃』〧〙【。〝「〶〻〝〖〢〡〿〓〖〺〝〈々】〈、『〼〣》〔〪《〢〣、〛〕〙〞〭〿〧〵」〴〾〯〫「〨〕〨〄〷』〵《〶〼〘〗】々〖　〳〶「《〝〰々〢〙〈〣〶〟〓〱〬〇〷〦〿』【〕〪〶〺〽〄〡〷〽〲》〟〃」〵〤〞〤〠〜〵〽》〉〡〦〖】〉〓〥〤〞　〺《〖〗　、」〯〳「〾【〩〮。〝〮〙】〦〴『」〘〕〉〚〯〳〇。〾。〇〔』』〚》〃、〠【〝〮」〟《〆〮〇」〥。〟〦〿〠〟〰〺〳々〯】〨〸〼〳〭〶〷〮〨〳〘〤〦。〠『〸〖
 「〰〝〡〻〻、〇」〇〚』〧」》〮〲〫】〱〼〻〲〷〓〉〵〩〢〣〻〚〞〧〰〽〕〭〧々〠〹〃〟〄〰〚〽〣〚〥〺〛〟〄〮〟〴『〾〒《〺〡　〒〜〈〶〔〫〲〃〟」〿〘〥〥〥〓『〝。〧〾〓〶〺〆〷〩〣〫〜〿〿〰「〕〒〓〯〣〘〗【【〪〾〛〕〽〫〹【〿〧〛〵〲〛〒〇〉〧〺」〺〺〡『〳、〪〾〒〈〮〜〞〙〱【《〣〬〈」〣〵〹〥〵〞〻〆〭〵〟〒〲〧〓〖〣〓々〰〞〹〇〮】〪〫〶「〦〽〓〻〓】〽〭「〣〔〹〯〨〖〩〵〦〳〯〯〧。〗』〾『〩〗〴」〼〗〨〵〥〴、。〒〣〧【《〓〜〓〠〢〓】〷〺〼〕〡〆、〦〿〥〾〚】〕〦〖〙　〭〬〙〇〳〄〃〄〻〧〔〚〰〲〟〷『〫　】〲〲〸〳《〢〵〰〟〪〉〜〨〇〶〻〻〩〄』〒〴〨〈』〗〿〚『〝　〹々〳〼〲〗〙「〵〲〢〔〫〵〜　〘〶【〬『〱〗、〧『〛〇〛〒〈　、〦�
 ��〙〇〖〤〩〜〉」〉〿〬〧【〶〦〃〘〈〖〄〶〦〚〜】〛〽〡〸〰々〈「〾〼〒〥〞〸」〮〸〒〗〙々『〇〄〈〃〜〺〯〉〉〾〹〺〚〞〽〦〄〢〽〄〞〻　〼〄〘〙】〚〼〫〴〚〫〬〖〭〔。〰〹〶〺〕〨〇〛　」。〇〿〲「。〆〗、《〫〬〨〻〝】〓〥〾〴】〹〈〞〺〜〰〜〬〴〱〜〖〾〣〭〥　〯〩〶〈》〸〝〼》〶〆〆〽〼「〗〓『〕〃】〡〠〹〺〈【〸〝〤〮〸〭〩〼〈〃〃〉】〳〿〃〬《　〩〈〒〢〠〆》〇〭〬〓〖〝】〧〶〞〈〶〘】」〽〝《〡　〈〟〶〯〹〦〨〷〩〧〞《〵〬〰々〞〧〓〥》」》〤〥〧〧〓〛。〦〄〫】〪〔〟〟〷〧〷〟〺〪〩〷〡〘〞「〔〽〯〔〬〈、〴〨》〥〒々〼〒";
+            Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, JapaneseTokenizerMode.SEARCH,
+                                                    JapaneseAnalyzer.GetDefaultStopSet(),
+                                                    JapaneseAnalyzer.GetDefaultStopTags());
+            CheckAnalysisConsistency(random, a, random.nextBoolean(), s);
+        }
+
+        // LUCENE-3897: this string (found by
+        // testHugeRandomStrings) tripped assert
+        [Test]
+        public void TestYetAnotherCuriousString()
+        {
+            Random random = Random();
+            String s = "〦〧〷《〓〄〽〣》〉々〾〈〢』『〛【〽〕〗〝〓〭〷〷〉〨〸〇〾〨〺〗〇〉〲〪〔〃〫〾〫〻〞〪〵〣【〩〱〭〨〸〃々〹〫〻〥〖〘〲〺〓》〻〷〽〺〯〫』〩〒　〇〔】〳　〵〮〇〡「》〭〆〒〜〱〒〮〺〙〼」〤〤〒〓〶〫〟〳〃〺〫〺〺〤〩〲〬　〱〜〝〤〘〻〚〻〹〒〃」〉〔「〺〆々〗〲〔〞〲〴〡〃〿〫」〪〤」「〿〚』〕〆』〭『〥〕〷〰〝〨〺〧【『〘〧〪』〫〝〧〪〨〺〣〗〺〮〽　〪〢】「〼〮〨〝〹〝〹〩〳〞〮【」〰、〳〤〩〄〶〞〠〗〗〙〽々　〟〴〭、《〃〝〈〒〸〷〓〉〉〳」〘」》〮〠〃〓〻〶〟〛〞〮　〇〨〭〹』〨〵〪〡〔〃〤〔〇〲〨〳〖〧〸　〴】〯〬」〛〨〖〟》〺〨〫〲〄〕」〵〦〢〴〰〨〺〃〓【》、〨〯〥〪〪〭〺〉〟〙〚〰〦〉〥々〇】〼〗〩》。〩〓〤〄〛〇�
 ��〞〣〦〿々》〩『〕〡　〧〕〫〨〹。〺〿《〪〭〫〴〟〥〘〞〜〩。〮〄《〹〧〖〿》〰〵〉〯。〨〢〨〗〪〫〸〦〴〒〧〮」〱〕〞〓〲〭〈〩『〹〣〞〵〳〵》〭〷「〇〓〫〲〪『『》〧〇〚〴〤〗〯〰〜〉〒〚〔〠〽、〾〻〷〶》〆〮〉』〦〈〣〄、〟〇〜〱〮〚〕》〕〟〸〜〃〪〲〵〮〫〿〙〣〈　〳〾〟〠〳〙。〮〰〴〈』「〿《〄〛〩〪》「〓〇〶〩〇、〉〦〥〢》〴〷》〦』〉〟〲〚〹〴〲》〣〵〧〡〾〦〡〣「〆々　〔〄〓〡〬〹〣〰。〵〭〛〲〧〜〽〛〺』〛〵〒〽〻〆〚〚〟〵〲〺〠〼〻〄。〯〉〃』〕〫〥〦〕〔〢々〷々〥〥〖』〶〿〘〗」〖『〢〯〫〇〣〒〖〬〜〝〩〉〾〮〈〩、〘〰〦〧〓〬〸〓〺〼〟〰々〩〩〹〣」〓〸〄『〆〰〹》〵〉】】〼』』〸〣〦〾〰〗〴〥〴〤〃〿〡〳」〢〩〡〮〻〘〤〝〗〃〪�
 �〈〴〪〯「〭〓々〃〯〄〼〚〧々〢〃〈〔。】〆〣〰〜〪〮〣〿〕〮〾〱〇〈〟〭】〔〥〡〝〙〛〔「　〼〶〸々〹〯『〞〒〇〟〃〳〓〩〝〿《〵】〙〛〪　〭〼〈。〷》〨〰〵」〤〄〾〄「〈『〥〽〕〙【〤》〳〝〔〠〤〲〘〱〈『〴〫〚「」〛〸〹】〱〒〆」。〯〃】〼〮〒〄》〾〷〥〟〞〲〜〲〟〫〕〆〇〸〸〹〾〰【》〨〤〭「〇】〳〯〤、〙〳〺『〲〽〬〥〠。〹〃」〹〪〭〒　〇〶〧〟〻【」】〙〤〡〱〖》〇々〽〬〥〨〠〘〺〳【〫〄〜〹〄〚〯〈〸〻〓〥〤〻〮〃〗々〪〺〿〬〙〈『〭〩〟〽〬〝〄〦〇〥【〨〫〦〗〯〞〜〈〒〽〖〧〼〈〭〓〶〃〰〙「〧〉〹〢〕〼〒〸〼〣〡〔〩〯〼〚〲〖〪〯〒〮】〥〙〯〆〡〲〾〭〫〕〘、〖〮】〟〺〝〨〤〯〓〛》〳〢「〒〥『〿〔〸。〫〬〡〓〝「々」。〘〣〲〴〆〲】〽〮
 〮〲〓〞。〲〘〉【〲〭〰〨〩〱「〆〩。〦〉〇〄〺〱」〮〄〯。《〭〹〳〸〜〮〧〷〜〹〥〾〨〬〦〮』〖】〖〥〞〕〧〹〽、〺〜〯〒《々〠〠〴〝〤〇〷『〳〞〠〤〣。】〝。〛〉《〩、〦〻々〄〙〞〽〒〧】〉〺〦〔〄〯〙　〫〴〈〽〴《〰〱〗〢〓〔〗〖〖〪〷〠。〨〠〙〴〷〿〻〴〪〠　》〉「〛」〟〗「「〚〤「〫〨〣〉〶〥〢〈〯〄〈】〃〵〪〼〸「〾〥〒〲〮】〙》〡〯〓〵〡《〬〾〛】〄〡〦〪、〆、〵〒〹〰〴〜〬〶〭〕〟〠〰〜〶〵〨〾《　〻〵〔〘〟〾「〡〃〼。〤〺〭〨。〪》〄〇〄〔〖〺〪』〆〸『〰〭〆〗〪〪　〇〜〡〨〞〧〇〛〥」〼〇〼『〸『〵〼〇　〽〹〨〪〗〳〽』〵〽〸〷〄〿　〩〢〺〳〗〞〹〒〼〕。〇〷〔〯〜〘〾」」。〥〯〤〖〛〙〹〘〯〡〱〮》〰〾〚〚〣〆〰〹〾〝〉〲〠〗】〤〿〶�
 ��〾〇〽〤〰〆〭〝】〤〰〼〪〬〰〸〓、〃〵〄〉〤〲〱〨〵〴〮〹〬〧〜〭〶〒〯〺〬〒〭〲〡〔〚〹〇〫【〯〥〪〻々々〨〧〳〛〯〿　〈〽〥〘〖〣〿〫〲〶〚　〓〙〫〴〆〙〶〽〉、〔〪〫】〤〟〓〃〝、〧〡〸〸。〸【〹　〧〡】〡「〗〴〴〳〶『〱〖「〺〠〼〾〱〃〖〤「〧〭〟〇〧〙〕〩〭〻〤〩〪〳〪〟々。〷〥〗。〳〸〆〢「〆〿〻〚〳〚〸〟〘〡〘〇〶〖〡〇〾〥〖〝〝〹』〦。〖「》〥〞〳〛〕〖〥〻〙〾〔〬〈〇〓」〭〹〷〪〖《〫〾〒〙〺〻〨〼〇〝〾〣〴〚〩〴〕〢〦〩』〭〧〵〾〟〣〬〥〟〣〜」、《〲〧〪〸〸【〙〹、〤〽〰。〦〩〮〹】〸〆〹〗〓〶〇〤〳】〾〨〞〩〱〡〇〱〮。〶》〝〱〗〃〘〣〬〲〽〈〒〻〃〥〪〭〤〗〰『〵〹〙〇〵【〕『〤〄〕〥〵〸〮。〳〮、〤〣〱〧』〯〜』〉】『〷　〰�
 �〓〙〃〟〆〼〞「〫〄」』　〨〹〸《〷〔〫《〝〞〆〬〩〟」】〾〷〄》　〵　〫〵》〻〨〰　〟〈〰〽「」〸〣〪〮〛〞〜〦〱〚』〕〱〪〲〩〥「〚〓〺〣〶〨』〕〇〮〹〟〞〕〶〡〭〠〕〦〦〢〽〤〈〈〻〣〧〱〿〵】〖〞〖【〢〩〼【　〻〘〃〤〫。〠、〗〢〷」】〼〘〖。〤〘〄〢〴、〘〆〯〱〜〃「〦『〯〰〘〫〹〶〷〿、】々〙〛〜「〹々〮〿「〸〉』〯〱〄〓〥〣〩〥』〖〤〛【〭〿〺「「〳〛〧〉『〈〆〒〠》〳〈〳〩〃〮〚〼」〲〮〩〮〮〢〸〿》〈〉〗〾〇〕〩〸〖〾〠》〃〞〄〣〭〡〕〣〚〆〤〄。〸〞。《〼〄〤〸」〿》〤「〵〥【〔〕々〙〸〛〛『〶〾。〷〫〼〽〤〨〓〭〻〈〶〿〾〨』〤【〾」〇〤〒〠〺〜〸〼〪〢〷〔》〣〤〬〣〱〝〇〺〢〠〤〹〡「〪〲〿〬〘〡〯、】〖、〈〶〛〢〕々〽〼〼〚〿〘】
 〢〰〡〿〗《〉〙《《『〶【、、】〡〓〦〞〵〤〧』〝〕　〄〃〸〈〤〪〻〭〉〘〷〉〕〨〻〢〢〡〸〔〮〧〹「〦〘〉〾〉　〺〽〷「〺〖〺〝》〃　〇〪〜〶〺〣〇〭　〾」〣〼〞〷々〽〤〶々》〻〈〽〒〕『〬」〈〟〕〷〼〲〄〚〜〴。〮》々〧〻〔〕〈「〾『。〴〷〯〢〿〦〈〸〩〻〃〻〚〞〤〈。〧〇〾〺〢〓〵〸〛〔〡〷【〜〺　〕〶〦〣〻〟」》】〺〚〷〺〹〙〳〺〬〓〢』〘〕「〸、〙〾》〖》々〬〄〇『。〵【〩『〺〆〮〮〙〵〫《〃〽、〓〠〨〚〕〈『〦【〗　〄〴〫〡〮〱〔〆〗〟〵】〻々》〲【〬〢〚〛』〱〰〫　〇〤〴〮〾。。〮】〇〲〻〙〰〥〚」〟〜〄〟。〤々〞》〧〉〳【〿〺〆〈〖》『〤〄』〾〵〲〸。〈『〕〺〘〣〶〬『〪〆〳〽《。〒「〽〨〸〜〚〘〪〤々〦〆〺『〣〆〽〇〿「〥〵〒〲〟〜〳〭〼�
 ��〡〮〆「〆〥〺》〱『〺〔〃〙〻〥々《々〙〼〪〼〵〙』〥「　〵〯〓〩。〰〕《〟〦〝、〦〦〤〗〴〩〹〶〠〰〡〇〤〹〓」〣〆〜〴〘〔〃「〤〈〩　〠【〃〙〢々〉〝〬〙〭【〮〗〙〤〿〖〓〫〻』〞〤〼〳〹〄〵〾〔〛〮。〒〉〤〣〭〰〨》〭〲〗〃〇〆〡〜〱〲〮〫〄〬〄〉〯〈〮〩【　〮〦《〪〲〣〡〶〬『〲〵〇〶〰〒〭〽　〰〄〻〄『〬〩〠』〕〫〤　〼〶〳〮、〓〸〲〓〜〳〺〈〫〺〒　〨〡〡【〷〆〇』〝〩〨〗〕〪】〪　〛〛〺〙〷〦〠」〱〞〼〸」、〢　〺」』〲〆〃〟〱〟〝「《〸〳〒〖〨】〥〖〈〧〼。〫『〙〧〡『】〔々、〼〝　〕〙〇〘〲〔〝〺〘〄〓〒〼〈〛『〺々〩〱。』〬【〱「〳〜〼〬〴』《〗〔〡〰〪〤〥〲《』〥〉〪【〶〤【〻〡〒〯〜【〽〪〉〠〾〙〰〚〵〦〦〴々》〙〠」》〠〱〓【〶�
 �々〻【〽〶〼〺〷。〶【〘》〻〗〳〣、」】〳〓〞〆〆〾』「〈〙〕〱〢〳〨〰〡〸。〣〪〤「〱「『〙〽〇々【〜〖〮〚〟」。〜〰〉〔。〣〽〇〖〬〆〥〖〧〨〱〡〸〪〣々』〄「『〞〶　〴〰】〃〱〱「〶〝】〞〭〚〴〶〻〟〧〡〳〬〧、〣】〕〼「〠〃〷〣〩　〭〄〩〝〦』〟〇〦〟〕。〩』「〵〩》〿〻『〙〼〲〰》〨〉〆〓、〺〹〸。〞〧〗〘〳〓〞〹〕〡〼〔〖〴〄】〚〻〯〴〣〮〦〧〣〵〼〚〾〫〼〣〔〚〽『〵〒【【〝〹〮》『〨〜〠〸〠〵〨〙【〧〸〈》〱〗【〓〤】〰】】、〩〽〈〸〔「〵〻〙〓〰〇〚〞〗〙〢々〭〜〈　。〧〿〧〨〵〾〝〬【』〫〦〸〬〈、〒〢〉〞〵〒〼〝》〻〫〧〤〶〹〼〩〛〫〣】〿」〴》〺〬〤〕〲〕〙〔〪〰〿〬〒〔〞〆〻〴〘〩〨〤》〩〪〭〳〇〣〚〟〚〕〓〴〱　〵〃〠〭〠〚〗〃
 〃〸〰〢〡〿〭『〗〉、〲〕〧「〛〛〓〜〰〮』〱〨〬〨〽〸〽〶〣〯〫、〯々、〴　」〕〥』〻】〖〴」〨。〖〤『〜〰〩〣〣〸、〫〝、〯〹〷〳〚〄〷【〃」〼「〤】　〢〖　〣〙〺〽〽〱〤〔〓々〣〭〽〘〦〻〪〿〞〝〱、《〆」〸〷〛〓〕〹〜〪〹〶、〵〦〛〲〒〹〪〦〃〥「〸〪〙〧〱〠〰〝〆〠〯《〼　〛〚〔〟〽〗」、〲〥〞〴〃、「『〖〼〞〪〼〇』〿〶々〙〻》〥、〵〛〞〠〫〟　　〹〾〵』〤〿〣〪〗〃〖〬〩〴〗々〓〝〥〥〜〲〯〗〤》〛〮》々〚〘〫「〙〉【〆〽〨〹〮〧〷「〴〝〬〷〗『〔〷〮〟〲〬〸〸〟〹〆〖〨。〣〄』〴〚】〘〲〚〚〦〈〛〗〞〉〞〯〆〵〸　〗〕、》【〸〮〵〉〥〨〕〟〭【〾〇〵〬〾」〱〹〚〟〛〡」〩〃〄〬〱〭〚〱〆〛》〣』〝〡〦〣〫〒〗〛〿〤〇〼〠〲〢〬〿〓〠」〚〇〛�
 ��〴《〦〱〤〹〝〱〶〟〙〴〶〣〝〮　〜〲〱〿〳〪〄〝〃〰〙〖〼〰、〬〰』「〭〻〮　〩』〱【〆〻〺〸〾〤〗〸〥〽〼】〤〣〖「《〡〙。〸、」』〠》〴〈、〴〢〣〲〟〳〸〒〠〣〵〢〿》』〿『〾〔〢〶〦〟〠《〹「〷〽〷〆〇〉〲〿〵〙』〫〠々々〘』《〽〒〦〽〓〳、〮〻〫〞〲〰】【〗」々〥』【〫〆〫〳〾〣〖〺〷〙〘〄〈〼、〧〻〭〮〳』〘〾〇〸〉〽〗『〙〽〻〟〇〘〽〖〴〄〓〞「〦〪〚〾〨。〕〻〰〟〉〢「〉〿〯〔〹〃〛〛〝〔《〵「「〴「〗〸〖〞〦【々〣〲〤〾〿〽〲〥〢〥』〳〳〼『】〆〼》〩》」。〛〲〡〳「〢〥》〘〠〃〳〃〒　〧〓〡〤〄〲」〦〶〷〟〛〠〱〽〫〫〸〇〔、〪〛〠　」〢〳〸『〸〚〹〈〘〉〫〇〲〲〈〕〙〱】〯々【〬〖〿〒】〔〭〣〚〄〈』〧〗〹】〇〬〸〾　〭〺』〯〫〻『�
 �〻〱　〴〆〘「〠〈〫〡》〤〕】〜〙〵〒〙。〦〮〞〪』〴〓〪〾〝〹〴〼《〦〞〖〆《〥〸〻〈〽〪〤【〖〶〞〤〃〰〨〱』〨〼〱〠〣」〝〹〝〕〼〔〃、〮】〤【〼〤〼〥〪〲〓〦〘〟〞〭〜〸】〚〸〵〞〙〧〈〽〹〄『『〙〓〸〯。〜〺。「〖　〶々〉〈〮《〢〭〶】〘〜〺〸〒〥〢〾〈』〱〃〤〳〖〉〼〫〛〚〽〫〳〰〫〥〜〜〺〷〲《〢『〛〭〈〧〳〣〜〝〧、〥〾〻〳〺〕〥〥〼》」〺〮〒〣〥〲〟〠〫」〾〱〼「〄〆「〓〽〹〵〈〙〛〵〰〩〟〫〈〔々〒〟々〉、〷〚〶〆〘〛。　【「〸〸〖〫〕〰〱〺〟〫〿〹〩〇。〾〒〚〲〾〛〳〨〦〙〒》。〺〧〡〞〒〚〩〪〶〘〣〨〶〩〛〺〙〪〄〼〮〰〒〡〼〓〙〒〇〽『〃　』〇》〽〃《〒〠「〚〨〗〶〴〪〮〵〘〨々〓〗〚〠』〗〮〳〺〲〙〒「〴〼〻〤〉〯〨〧〈】〾〟〝
 〒〃〘〧『〶〿、〤〝】〜〴〰〷〽〮〱、〩〽〺〯〫〜【〴〈〳〖〬】〦〘〗〜〝〄〚〚〤〨〲』【〞「〰〔》〷〥〈〡〳〢〾〮《〭〫〡〴〹〻〚「〰〻〉〣〢〤〤〝〩〧〙《〓】〺〺〓〿〹〈〚〱〬〘《〽〈〕》〣〓〒〴〆〜〭〖〛〝〷〧〴〮》〳〘〸〴〿〥〙〒〔「》〓〕〦〯〾〯〝、〮、〯〆〛』〞〝〵〥〬〚〡〰〔〵】。〽〥〿』〩〇〝〄〴〪〭〸〫〡〣〧〆〚〫〴〙〦〽〉〸〼。〱〨〛〠。〮』〝》〻〹〈〄《〻〱〥〞〽〾〄〝〢〿。〴〆〲『〰〢〖〲〼〯〃〠【〲〵〛〣〝〕〬〺〰〪〻『〨】〖〥〵〹〯　〒「〠〮〈〃〹〽〬』〹〷〫〕〧〟〒〉〉、〈「〟》〼〪〰〗〘『〞〉〹〚〤〩〦〗〖〮〰〇〠〫」〔》〮、〆〡〛〻〙「〵。〯〹〘「〵〫〼。】〃〢〺〴〛〪〬　〞〟〓」〭】、〸〘〻〈〤》〓〩〽〆〵〨〈「〦〠々〨�
 ��〢〛〝〿〗〥〱〕〩〖〣〄〚〿〆〗〢〉々《〚〩〶》〥　【『〪〯〾〸〪〲〞〠　〡〓〻〷〢〕』「〹〯〛〫〲〗〗〚」〵【〪〢〥〫〆》〦〥〱〯【【〉〧〺〻〉〬〳〒〳〾〲〲〇〇。〪〙〧〿〆【」〇〪〸〽〦〚〽〿　〠〺〥〦々〬〄〟〪〭、】〴〾〸〛。　『、《〫〺〯〛〩》〓〴〪》」々〧【〦〇〮〬〲〗〔〦〴〣〼〨〖〩〬〼々〛〇」〴〦〉〤〺〪《　〒〧々〤〧〣〘【〵〛〢〵《〛〘〵〓〶〳〤〺〨〣〭〤〪〮〺〷《〗〵〞〻〠〭〃】〄〒〯々〶〉〞々〽〤〇〦『〦〽〩〬〠』〷〄〩〙〖〝『〘『々〔【〿〰〶〪〱〉〘》〃〙〧〦〇「》《〹〰〯】〹〄〈〪〜〵、〮〣〇〯〲〛〬〕】々〸〹〩〟〳〆〥〯〬〠〭〯『〙〆〾『「〈〬〹〕〾、〸」〷〥〆〺〾〖。〆〒〮〻〡。〉々〕』『〨〼　〢〓『〢」々『　〘、〖〤〜【々〤�
 �〵〳〤〽」〟〥〴』、〒〥〆〙〬〧〔〡〄》〷。〣〉〪〙〚〾〣〵〰〮〔〇〝〫〫〩。〪〷〩。〇〿】〲〦〳〕《〄〴〦〽〔、〱〧〟。〻〺〔〝【〲〔〦〙〖》〠〫】〵〙〰。〖〸〼〣〗〲々〤〢〷〝〰】〳〳〯〟〓〬〺〤〿〲〩〞〡〧〲〧〭〽〪〰〥〧〴〈〈〢〕〯〔〨々〭〸〡〖〓〤〒〝〻〻』〣々〸【〸〸〷〓〇〦〻〤』〉〾〛「〢〢《】〜々〛〇〠〒〹〖〽〮〚〫〜〼〄〓　〹〽《〽》〮【〺〦〠〨〰〸〘〲』」〹〳〤〽〴〴〰〳〷〟】〼〽〓〇、〡〚〶　〥〄〉〴〵、〷〳〥〬〳〓〩〯〜〪〯〬々〢〾〆〨〥」』〪〄〨〽〗〭〯〼〒〡「々〩』　〉〔〓《〉〺〫〖〽〱〳〡〪〯』〼〉〝〟〹〯〇〠〥〨〖「〢「〥〲〘『〹〥〶〜〥『〃。〲〗〢〩〮〕〨〸』〪〯〲】〠〻〟〶〣〸〵〩〔〾〞〳〾〇〵〥〟〭〳〡〆〾〤〶】〈〓〄
 〮〢〒〩《〔〭〄》『〰〧〡〖〵〥〵〒〭〳〵〝〜〱々〞〰〴〦〱〿〾〴〪〥〧〚〚〒〚〘〿〛〾〫〚〕〷〔〗〢〻〠』〘〾〖〿〦〥〮〆〼〞〴〹〸〻〵〞〄々〷〔《】〛〒〻〓〴〮〛〺》〫〬々〦〦〬」〯〞〼〚〘〰〿〝〾〘〠〵〴〃〞、〹〢〗〹〰〤「〔』〇〒〭〫　〞〉〿〜〳〫〩〿〧〵〟〾〤々〩〝「《〬〃〇〬】〔〇〆〷〭〬〵〾〚〺〬〧〻『」〈」〻〹〞、】。〉〯〫〺〒〙」〱〛〻「』〱〺〠〄【〿〦〰〸『〬〴〓〨〢《〣〓〜〒〡『〼〔『〵〕〝〗〳《〲〳〼〝「〽〬〱〺〠〱〽〘〗〹〨〆〕〠々〓〤】〺〉〴〰〮」〰〿〹〳『〠〔〇〧〭〼〪〭〯〖〶〬〃〱〔〙》〺〜〵々】〡〧〲』〕〛〳〥〩〱〮《〦〫】〖〈》〞〻〤〢〦〪〬〲〗〢〷　　〳〰〓〕〜〥」〬〗〒〜〉〩〆〬々〿〪『〣〘〡〘〯〳【〄〠〸〼〈〰『。�
 ��〲〭〡〷〥〯〴「。〤〓〪〆〦〆〒〽〫〰〚〡〨【〯〹「〧」〓〖〘〳」〕〲〚〣〕〆〃〱〞〷〺〻〃』〩〫〦〱〴〟、〰〘〞《」〛〤〿〔　〤〱》〗〷〡〡〗〞〦〿。〤〳】。〟〻〉「〻〙〖〿〄〶」〾〫〽〸〕〢〰〞〞〒〜〻〠〭〫　〞〴〰〶〺《〣々〩〲〡〴》、〩〝〞【〼〓〱〻〩〒〖〿〮〱〧〟〒〶、〿〈「〻〴』』〇〉〝〛〢〜〼〘〰〇〢〃〲〟〨〟〣〟〰〉〮〘〽〧。〓〳〩〺〳〓〘〗〖〈〜〴〟〽〣〣〾〽〩〲〜〇〰〩〕〧〚〄〴〴〴〨〠〦】、〣〺〖》〯〷』〒〤』〙〗〬。〧〆〜　〧〩〯〞〜〬〡〆、〞〔。〾〩〈〛〼」〾〮〤〾〟』〉〔〞〾〛〲《〈〫〝〽〳〞〔【〿〽〩。〈〨「〯《々〇、〯〜〾〝〯〼〆〟〉〝〮〙〪〚〮〱〹〯〜〟〠、〄〹〧〳〱〯〖〯】〩〴【〫〇「「〿〩〷〾〴〯〦〼〦〟〖〤〪〥〰〔�
 �〪〄〖〳〵〟〕〰〬〶〚『〘〻〇〽〪「〉】〮「〣〿〇〭〕〓〵〽〆〳　〨〩〕〬〵〸〻〲【『〥〖〚〢〰』〠。、〮〣〆〴『《〲〓〷《〱〰々〫〶〢〯〗〚〙〶〫〖〃〻》〰『〱〘〫〛〄〉「〠〱〚〖〕》〤《　〵〶〢〯〗〳〛〚〽〗〟〛〪〾〶〞〶々〆〯〇〝〕〨〨〣〫〄〵〞〛〬〣《〦〦〒〉〙〫》〞〨〜『〝〻〒〟〓〜》〡〡〫〻』〆〒　〔。〓》《〨〙〿〙〔〘〮〦「〚〻、「〵〠〉〬。〭】〱〸「〶〈〞〈〪〟〻〝〲〮〆〼〯『〱〡〙〮〕〒〣」〳〥〙〡〡『〇〠〡〭〷〜々』〣」〼々、〗〡『〽〻〽〳〉〄〵〬〽〯〥〾〙〉〿〮〴〷〥〡〰〹〰《〺【〒」〙〾〽。〴〘〕〝　〳」〡〇〩〥〾〆〨〉〫〠〙〤〒【〸々〣〓〰」〈〪〵〠〚】〈〆〵〗〜〦〣〃〼〔〉》〆〞〚〆〄〫〺〽〪々〩〴〵〹〿〔〥〜〩〪〤〗。「〽〨〟
 、〄〽】〩〙〝〺〶〸〟〯《《〥〣〻『〟〽〮〄》〙〕『」〾〼〷』々〥〒【、〗〔〯】〮　〹〩】〡〇〟〫〢〨〡〭」〄〼〙〪〻〪々〙。〫〧〪〞〾〄』〟〶〇〞〜〥〘。〝〨〸】〕〔〨〕〾〃〾〒」〈〒〓〼〗〖〕〱〙〘〓〝〾〔【〵〿〖〸〷〵〩【〞々〼〢〧〻〥〰〦〤》〰〛〡。〖〝〙〒〽〜〕〘「。〵〇〒〾〼〽〈〣〇〒〙〢〸、〞〲》〪〰〴　〽〭〷〸〫〆〞〾〨〆〛〔〤〜」〈〨〃〈〴〽〲」。【〞〒〉。〱〕〨〽。　〷』〦》〵〩〪〡〕〞〹〃〧〃〝〢〴。〃〛〭〻〣〸〖〞〻【〛》〜〳〜〟〘〄」〸〬〶〥》〨〭〡〦〇〇《〱】〸〼〺〬〛〓〔」〰〈〧、【〕」〳〼〗〯〉〒〖」〧〩》〴」〺。〰〷』〩〚〭〞〰〶〚〲〙〥〢。〽〵〱」】〓〘〦。〭《〥〙、〱〹〦】〕》〲、〘〓〙〷、〪〕〉〭、〇〜々〖〨〞」〠〕�
 ��〨〕〔〻〿〙〘〙』〼〘〡〢〧〚〢〷〸〰〟〰〗」〪〛【〪〺〒〱〈〦〽、『〥　〙〪〕〝〄〛〣〴〯〆〒〰〜〪〆〠〞〾〃〭〬〡〉】〄〃〥〥〒〶〕〢〵〣〢〨〘〩〹〖〧〒〺〫〕〡〆〭〘〿〠〹〲〔〫》〪〰〇「〯〫〈〾〱〄、〮『》〹〿〿〱〦】〳〰」。【〘〆〞〚〱》〫〷〸〠〲〚〶〷〘〩〯〛〄々　』〪〭〬〖〪〦々〼》〇〤。〉〯〟〮〢〤〬〜〪〬〺〿〹〖〔】〕〖〣　『〵〸》〧〻〺〜〧〯〄";
+            Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, JapaneseTokenizerMode.SEARCH,
+                                                    JapaneseAnalyzer.GetDefaultStopSet(),
+                                                    JapaneseAnalyzer.GetDefaultStopTags());
+            CheckAnalysisConsistency(random, a, random.nextBoolean(), s);
+        }
+
+        [Test]
+        public void Test4thCuriousString()
+        {
+            String s = "\u10b47\u10b58\u0020\u0078\u0077\u0020\u0020\u006c\u0065\u006c\u0066\u0071\u0077\u0071\u0062\u0020\u0079\u0078\u0069\u0020\u101da\u101d5\u101e6\u0020\u0074\u0020\u2c55\u2c18\u2c2d\u2c08\u2c30\u2c3d\u2c4f\u2c1c\u2c1b\u2c1c\u2c41\u0020\u003c\u002f\u0073\u0020\ue22a\u05d9\u05f8\u0168\u723b\ue326\ubf5f0\u0020\u0063\u006a\u0072\u0070\u0061\u006b\u0061\u0071\u0020\u0028\u005b\u003f\u0020\u003f\u003e\u0036\u0030\u0020\u0020\u0065\u0068\u006a\u006b\u0075\u0074\u0020\u0068\u0067\u0020\u0071\u0070\u0068\u007a\u0061\u006a\u0062\u0065\u0074\u0069\u0061\u0020\u006d\u0079\u0079\u0065\u0067\u0063\u0020\u3066\u3082\u308e\u3046\u3059\u0020\u2125\u2120\u212d\u0020\uffbe\uff5c\u0020\u0067\u004c\u0025\u0020\u0020\u2df6\u0020\u006b\u0020\u0066\u006a\u0070\u0061\u006e\u0064\u0020\u0067\u0072\u0073\u0020\u0070\u0064\u0063\u0020\u0625\u0278\u6722d\u2240\ufd27\u006a\u0020\u4df1\u4dee\u0020\u0072\u0065\u0063\u0076\u007a\u006f\u006f\u0020\ue467\u9d3a0\uf0973\u0218\u0638\u0020\u0019\u00
 50\u4216c\u03e6\u0330\u894c2\u0020\u0072\u006d\u0065\u0020\u006e\u0061\u0020\u0020\u006d\u0075\u0020\u0020\u0063\u006f\u0074\u007a\u0020\u0069\u006a\u0076\u0078\u0062\u0061\u0076\u0020\u1c26\u1c2c\u1c33\u0020\u0067\u0020\u0072\u0068\u0073\u006a\u006e\u0072\u0020\u0064\u003f\u0064\u0020\u0020\u0073\u0073\u0073\u0072\u0020\u0061\u0020\u0076\u0077\u0062\u0020\u007a\u0020\u0077\u0068\u006f\u0062\u0062\u006e\u006f\u0070\u0064\u0020\u0020\u0066\u0073\u0076\u0076\u0070\u0066\u006c\u006c\u0066\u0067\u0020\u006c\u007a\u0065\u0078\u006e\u0020\u006d\u0066\u0020\u005b\u0029\u005b\u0020\u0062\u0076\u0020\u1a12\u1a03\u1a0f\u0020\u0061\u0065\u0067\u006e\u0020\u0056\u2ab09\ufd8b\uf2dc\u0020\u006f\u0020\u003a\u0020\u0020\u0060\u9375\u0020\u0075\u0062\u0020\u006d\u006a\u0078\u0071\u0071\u0020\u0072\u0062\u0062\u0073\u0077\u0078\u0020\u0079\u0020\u0077\u006b\u0065\u006c\u006a\u0020\u470a9\u006d\u8021\ue122\u0020\u0071\u006c\u0020\u0026\u0023\u0036\u0039\u0039\u0020\u0020\u26883\u005d\u006d\ud5a0e\u516
 7\ue766\u5649\u0020\u1e0c\u1e34\u0020\u0020\u19ae\u19af\u19c3\u19aa\u19da\u0020\uaa68\uaa78\u0020\u0062\u006b\u0064\u006f\u0063\u0067\u0073\u0079\u006f\u0020\u0020\u2563\u2536\u2537\u2579\u253f\u2550\u254c\u251d\u2519\u2538\u0020\u0070\u0073\u0068\u0020\u002a\u0061\u002d\u0028\u005b\u0061\u003f\u0020\u0020\u31f9\u31fc\u31f7\u0020\u0029\u003f\u002b\u005d\u002e\u002a\u0020\u10156\u0020\u0070\u0076\u0077\u0069\u0020\u006e\u006d\u0073\u0077\u0062\u0064\u0063\u0020\u003c\u0020\u0020\u006a\u007a\u0020\u0076\u0020\u0020\u0072\u0069\u0076\u0020\u0020\u03f2\u03d0\u03e3\u0388\u0020\u1124\u11c2\u11e8\u1172\u1175\u0020\uace9\u90ac\ua5af6\u03ac\u0074\u0020\u0065\u006a\u0070\u006d\u0077\u0073\u0020\ue018a\u0020\u0077\u0062\u0061\u0062\u007a\u0020\u2040\u204f\u0020\u0064\u0776\u6e2b\u0020\u006a\u007a\u006e\u0078\u006f\u0020\u030f\u0334\u0308\u0322\u0361\u0349\u032a\u0020\u006f\u006e\u0020\u0069\u007a\u0072\u0062\u0073\u0066\u0020\u0069\u0079\u0076\u007a\u0069\u0020\u006b\u0068\u0077\u0077\u0064\u0
 070\u0020\u3133\u3173\u3153\u318c\u0020\u007a\u006c\u006a\u0074\u0020\u0065\u0064\u006b\u0020\u002b\u002e\u003f\u005b\u002d\u0028\u0066\u0029\u0020\u0020\ua490\ua49e\u0020\u1d7cb\u1d59f\u1d714\u0020\u0070\u0075\u0061\u0020\u0068\u0020\u0063\u006e\u0020\u27b1\u271c\u2741\u2735\u2799\u275d\u276d\u271b\u2748\u0020\u55d4\uec30\u1057b4\u0382\u001b\u0047\u0020\uf1a9\u0a76\u002d\u0020\u005d\u005b\u0061\u005d\u002a\u002d\u002b\u0020\u2d05\u2d22\u2d03\u0020\u0073\u0064\u0068\u006b\u0020\u0067\u0079\u0020\u2239\u2271\u22fc\u2293\u22fd\u0020\u002c\u0062\u0031\u0016\uf665\uf0cc\u0020\u0064\u0068\u0074\u0072\u0020\u006b\u006c\u0071\u0061\u006d\u0020\u005b\u005b\u0020\u41dad\u721a\u0020\u39f2\u0020\u0020\u13f4\u13e4\u13a3\u13b8\u13a7\u13b3\u0020\u0049\u0004\u007b\u0020\u13420\u0020\u0020\u2543\u252f\u2566\u2568\u2555\u0020\u007a\u006e\u0067\u0075\u006f\u0077\u0064\u0077\u006f\u0020\u01d4\u0508\u028d\uf680\u6b84\u0029\u0786\u61f73\u0020\u0020\ud7ee\ud7fd\ud7c5\ud7f4\ud7e1\ud7d8\u0020\u8c6d\u182a\u
 004f\uf0fe\r\u8a64\u0020\u0064\u0077\u0068\u006f\u0072\u0061\u0020\u006b\u006a\u0020\u002b\u002e\u0028\u0063\u0029\u0020\u0071\u0018\u2a0a\ubfdee\u0020\u0020\u0020\u0020\u003b\u0020\u4dda\u0020\u2ac76\u0020\u0072\u0078\u0020\u0020\u0061\u0073\u0020\u0026\u0020\u0068\u0077\u0077\u0070\u0079\u006f\u0020\u25cde\u05b2\uf925\ub17e\u36ced\u002e\u0020\u2e285\ue886\ufd0c\u0025\u0079\ueecb\u0038\u0020\ud03c\u0039\n\uc6339\u0020\u0077\u0074\u0020\u0065\u0069\u0064\u0065\u0020\u0075\u006e\u007a\u006d\u0061\u0074\u0020\u0066\u0064\u007a\u0070\u0020\u13114\u1304d\u131c3\u0020\u006f\u0061\u0067\u0071\u0070\u0067\u0020\u0069\u0020\u1f007\u0020\u0070\u006f\u0020\u002e\u005d\u002a\u0020\u0062\u0075\u0077\u0020\u0020\u0021\u0038\u0020\u006f\u0072\u006f\u0078\u0020\u0070\u0020\u12a2\u0020\u25e1\u25e7\u25be\u25c9\u25c6\u25dd\u0020\u0062\u0062\u0065\u0069\u0020\ua6a7\ua6d4\ua6cd\u0020\u006e\u0063\u0076\u0069\u0020\u003f\u002b\u007c\u0065\u0020\u0075\u0062\u0076\u0065\u0073\u0071\u006d\u006f\u0073\u0020\
 u0071\u0020\u10282\u0020\u174f\u1742\u1758\u1750\u1757\u1752\u174d\u175f\u0020\u006f\u0020\u0020\u0068\u0077\u0020\u0020\u053a\u0036\u0286\u0037\u0014\u05f1\u0381\ub654\u0020\u006b\u006b\u007a\u0079\u0075\u0020\u0076\u0072\u006d\u006d\u006a\u0020\u0074\u0020\u0075\u0074\u0020\u0639\u0057\u0235\u0020\u006d\u0064\u0061\u006e\u0079\u0020\u003c\u2b7c6\u0020\u0063\u0061\u006d\u0068\u0020\u835f\u0572\u20b2\u0020\u0066\u0068\u006d\u0020\u0071\u0063\u0061\u0079\u0061\u0079\u0070\u0020\u0061\u0063\u006a\u0066\u0066\u0068\u0020\u0077\u0068\u0074\u0074\u006c\u0061\u0020\u0020\u0077\u0064\u0073\u0020\ue0068\u0020\u0019\u0048\u0034\u0020\u0064\u0068\u0077\u0062\u0020\u006e\u006c\u0079\u0061\u0062\u006f\u0074\u0020\u0074\u0065\u0077\u0020\u0063\u006f\u0065\u006a\u0020\u1b3f\u1b7a\u0020\u0020\u0020\u1f00b\u0020\u0020\u0061\u102c5c\ue1b9\u0020\u0071\u0069\u0067\u0066\u0020\u0016\u8e2f\u005f\u0067\ud6c2\u0020\u0073\u0071\u006f\u0020\u006e\u0078\u0066\u0063\u0066\u0064\u0069\u006e\u006e\u0020\u0024\u
 0078\u59d1\ueacd\u0020\u25367\u07ac\u5652\u0020\u2592\u2588\u0020\u007a\u0068\u006f\u006c\u0078\u006a\u0064\u0020\u0070\u0065\u006a\u0076\u006d\u0079\u0020\u0020\u0066\u0061\u0063\u006f\u0020\u006d\u0072\u006e\u0061\u0070\u0020\u0062\u0075\u0075\u0020\uf2e2\u07d9\u0020\u1cd1\u1cee\u1cf3\u1cdc\u1cf4\u1ce5\u0020\u006a\u0077\u006b\u007a\u0020\u0079\u006e\u0062\u006c\u0020\u003b\u003e\u003c\u0070\u003e\u003c\u0020\u007a\u006c\u006d\u0020\u0020\u0078\u0062\u0079\u006d\u006b\u0020\u0065\ue74e\u00d7\u5cb6\u0020\u006a\u0062\u0020\u006b\u0067\u0074\u006e\u0071\u0065\u0069\u0075\u006f\u0020\ued7a\uae84f\u0052\ucf09\u0292\u265e\u0456\u0020\u0063\u0064\u006a\u0062\u0075\u0077\u0020\u0020\u10ac\u10f1\u0020\u013a\ue711\u0075\u0000\u0020\u0020\u2b30\u2b25\u2bf3\u2b5d\u2b21\u2b86\u2b45\u0020\ua830\ua83c\ua830\u0020\ue864\uf7ce\uf5c8\uf646\uec28\uf30e\uf8ab\u0020\u31c9\u31e8\u31d6\u0020\u0020\u0074\u0075\u0065\u0070\u0020\u0067\u0078\u0062\u0068\u0071\u0069\u006a\u0020\u1dc2\u0020\u0070\u006b\u006d\
 u0020\u0020\u50ba6\ue1a9\uc0bb\u59a1\u0020\u2fa3\u2fac\u2f8c\u2f35\u2f5b\u2f7e\u2f62\u2fd8\u2fc7\u2f2b\u0020\u0065\u0064\u0078\u0072\u006e\u0062\u0020\u0073\u0069\u0063\u0073\u0067\u0068\u0061\u0069\u0020\u0020\u0062\u006a\u0066\u0020\u2fae\u2fa4\u2f24\u2f04\u0020\uec10\u4a64e\u0038\uf806\u006d\u4ea8\u0020\u006a\u006c\u0020\u0020\u4deb\u4dc8\u4dd8\u4dc2\u0020\u0020\u1d24d\u1d209\u1d23c\u0020\ue4288\ufdd9\ue4e2\ucd9a\u0014\u006d\u0020\u0020\u31c3\u31e2\u31ed\u31db\u31dc\u0020\u0074\u0079\u0067\u0072\u0020\u003e\u0026\u0023\u0020\u0065\u006c\u0068\u0072\u0065\u006c\u0020\u03fc\u192c\ua9838\u5261b\u0031\u0020\u6e84\u44c1\u0020\u1f016\u0020\uf635\u002f\u0042\u0760\u0020\u006a\u0020\u0064\u006e\u0076\u0020\u0079\u0061\u0079\u0020\u003c\u0020\u058b\uf7e0\ufd7b\u07b7\u0020\u0079\u006d\u0074\u006a\u0020\u006d\u0064\u0069\u0020\u0020\u0072\u007a\u007a\u006e\u0070\u0020\u0020\u0020\u0079\u0071\u0065\u0068\u0072\u0020\u2d1f\u2d09\u2d1e\u2d21\u0020\ua859\ua85d\ua84e\ua84d\ua84a\ua859\ua873\u002
 0\u0065\u0020\u006c\u0071\u0070\u0074\u0069\u0020\u006a\u0066\u0078\u006b\u0076\u0067\u0071\u0069\u0020\u0026\u0023\u0078\u003c\u002f\u0073\u0020\u002e\u0029\u0020\u10298\u1029c\u1029b\u10293\u0020\ub1c5\u0600\u5fe3\u0632\u05f6\u0020\u003f\u003e\u003c\u0073\u0063\u0072\u0069\u0070\u0020\u2ff8\u2ff5\u0020\u006d\u0068\u0079\u0020\u003c\u0021\u002d\u002d\u0020\ued87\u53f6\u0428\u001d\u616f\uf1bf\u0034\u0020\u0d66\u0d2c\u0d01\u0d57\u0d43\u0020\ufb01\ufb1e\ufb4f\ufb08\u0020\u0076\u0071\u0075\u0020\ufee4\u269a\ued60\ue346\u007d\u0020\u0020\u006e\u006d\u0061\u006c\u0069\u0020\uf928\ufa9c\ufa27\u0020\ufe2d\ufe22\ufe2c\ufe23\ufe2c\ufe2f\ufe23\u0020\u247c\u24cb\u24fe\u2486\u248d\u24e9\u24d8\u0020\ufe68\u0020\u0077\u007a\u006f\u006f\u006d\u0065\u0020\u0007\ufcbd\u3d085\u0020\u0073\u006e\u0073\u0069\u006f\u0020\u1049f\u0020\u0062\u0078\u0020\u0074\u0020\u006f\u0062\u0076\u006e\u0075\u007a\u006e\u0073\u006c\u006f\u0067\u0020\u0026\u0023\u0078\u0035\u0065\u0039\u0039\u0066\u0020\u006f\u0020\u0058
 \u83d7\uf4d7\u9b59\u0020\u256f1\u0c8e\u005a\u024d\u0055\u0020\u0063\u0078\u006a\u006e\u0063\u006a\u0066\u0020\u0069\u0075\u0020\u0068\u0063\u0079\u0078\u0071\u0076\u006a\u0061\u0020\u13e2\u13ee\u13ef\u13cb\u0020\u0064\u006b\u0079\u0020\u0072\u0073\u006a\u0020\u006a\u0020\u003f\u003f\u0020\u0077\u0020\u0960\u0937\u0921\u0948\u095f\u0930\u0900\u090a\u0020\u0078\u0020\u0063\u0066\u0066\u0063\u0064\u006a\u006f\u0068\u0070\u0020\u002a\u0029\u0020\u0065\u0020\u003c\u003f\u0020\u0020\u0067\u0075\u0070\u007a\u0020\u1cf9\u1cd7\u1cd2\u0020\u024d\u067c\u05a8\u8bbc\ue605\u0647\u0020\u002b\u0020\u0068\u0020\u013f\uf379\uecc3\ue576\u002b\ufff9\uf03f\u0020\u00ab\u00d9\u0092\u0020\u0075\u0069\u0020\u0061\u0073\u0065\u0070\u0068\u0020\u0066\u0071\u0075\u0075\u0078\u0065\u006c\u0020\u1c5d\u1c6d\u0020\u007a\u0070\u0077\u0020\u0020\u0062\u0071\u0071\u006c\u0063\u0020\u065c\u06b5\u540c\u0020\u10917\u0020\u0065\u0076\u0076\u0077\u0020\u057c\u0020\u006b\u006a\u0075\u0069\u0020\u0067\u0062\u0072\u0072\u007
 4\u0069\u0072\u0070\u0020\u0061\u0069\u0079\u006f\u0071\u006d\u0066\u006d\u0068\u0020\u0074\u006b\u0020\u0071\u0077\u006e\u0071\u0067\u0066\u0020\u0061\u0076\u0063\u006a\u0071\u0078\u0020\u002d\u21d07\u0044\ufcef\u0020\u4a850\u3c7d\u69ac\u5231\u0020\u006d\u0067\u0063\u0073\u006d\u0073\u007a\u0064\u0020\u005b\u0020\u0006\u06d3\ufafe\ud13a\uf13e\u045c\u0013\u0020\u0028\u0029\u005b\u0020\u006f\u0074\u0020\u1693\u168d\u1698\u168c\u1689\u1696\u168f\u1696\u169c\u1684\u0020\u0072\u0020\u6381\u76ae\u6974\u65e1\u6c86\u8ab6\u0020\u0067\u0075\u0076\u006c\u0062\u0070\u0070\u0020\u007a\u006c\u0078\u0069\u0020\u1ce5\u1cd5\u1cf4\u1cd8\u0020\u1d376\u1d366\u0020\u0020\u0063\u0078\u006b\u0020\u005c\u0022\u003f\u0020\u19e8\u19f2\u19ec\u19e0\u0020\u0066\u0065\u0074\u0074\u006b\u0020\u0061\u0074\u0066\u006b\u006f\u0020\u0064\u0062\u0079\u0068\u0020\u0073\u0061\u0020\u3122\u3121\u0020\ufe98\ufed8\ufee0\ufec0\ufe7a\u0020\ucc8e\u001a\u1f84\u0020\u0073\u0073\u0072\u006f\u0020\u005b\u0020\uf6bb\ue660\u005f\u
 0ab8\u051c\u0020\u0062\u0073\u0077\u0020\u1a64\u1a82\u1a6b\u1a8c\u0020\u006d\u0078\u0020\u0020\u006e\u0061\u0063\u0078\u0068\u0062\u0020\u0070\u0061\u0072\u0068\u0020\u0020\u0069\u0020\u10850\u1084f\u0020\u4997\u4768\u40b0\u487c\u348f\u372f\u3b82\u46cb\u0020\u180f\u1879\u1874\u1841\u1814\u187a\u184c\u18a2\u1805\u1811\u0020\u184d\u0020\u0071\u0074\u007a\u0065\u0020\u10b5b\u0020\u005d\u77c4\u0744\u5c73\u455d5\u0721\u757f\u0020\u2d71\u2d6c\u2d4d\u2d36\u2d43\u0020\u0061\u0073\u0070\u0067\u0020\u006b\u007a\u0078\u0020\u0020\ua839\ua839\ua830\u0020\u17f4\u17fe\u17db\u0020\u0078\u0078\u0072\u006d\u0069\u006e\u0073\u0020\u0061\u0020\u0075\u006c\u0071\u0020\u006e\u0064\u0073\u006c\u0065\u0071\u0073\u0020\u0078\u0078\u0073\u0020\u0013\u052c\uf48c\uf52b\u4f95\u077a\u0020\u2d81\u0020\ubd0e\uec01\ueeda\u001e\u0045\u0020\ua811\ua816\ua823\ua818\ua815\ua812\ua813\ua809\ua806\u0020\u006e\u0020\u1311b\u0020\ucf09\u00da\u0041\uf001\u00db\ue292\ue170\u95dd\u0064\u006a\ua99b\u0020\u0070\u0020\u006f\u00
 6d\u0074\u006f\u0066\u0020\ubd23\u0020\u0073\u006c\u0064\u006e\u0079\u0078\u0062\u0071\u0020\u0020\u10564a\u0020\u0077\u0069\u0020\u0072\u0077\u0020\u0069\u0065\u0068\u006a\u006b\u0067\u0066\u0020\u006e\u006e\u0078\u0067\u0062\u006d\u0071\u0020\u0063\u0071\u0074\u006d\u006d\u0020\u2681\u0020\u0020\u0071\u0064\u0065\u006b\u006b\u0067\u0066\u0020\u0075\u0062\u0062\u0020\u0064\u0067\u006a\u0069\u006c\u0077\u0070\u0020\u0079\u0067\u0020\u0063\u0075\u0072\u0070\u0064\u0079\u0020\u2135\u214d\u2110\u2103\u0020\u2c56\u2c35\u2c4a\u0020\u003f\u002a\u005d\u002b\u0064\u0020\ufada\ufa20\ufab2\u0020\u0068\u006f\u0020\u006e\u006f\u0020\u1dca\u1df8\u1dfd\u1dd8\u1de4\u1dfe\u0020\u0079\u0065\u0075\u0020\u0079\u0075\u0072\u0020\u0076\u0074\u007a\u0066\u006a\u0065\u0067\u0020\u0073\u0074\u0078\u0020\u007a\u0020\u007c\u0028\u005d\u002b\u003f\u0029\u0020\u25a90\uc35f1\u0001\ue6c7\u0020\u002b\u002e\u002a\u003f\u002b\u002e\u007c\u0020\ucdee\u6d77\ueeb2\u8a3c\u0020\u003c\u002f\u0020\u0061\u0065\u0076\u0064\
 u0062\u0020\u006c\u0077\u0020\u006d\u0063\u0020\u006f\u0072\u0068\u006b\u0065\u0020\u0066\u0020\u0079\u0061\u0077\u006c\u006a\u0064\u0020\u0009\u0034\uf39c\u0019\ub0289\u0020\u002d\u007c\u007c\u003f\u0020\u1109f\u1108a\u11085\u0020\ufd8f\u0020\u0020\ufc09\ufdee\ufc9a\ufbba\u0020\u0020\u0076\u0071\u0065\u0070\u0020\u0071\u0075\u0020\u006f\u0071\u0067\u0074\u0067\u0065\u0020\u0074\u0076\u0077\u0020\u0074\u006c\u0063\u0078\u0020\u0063\u0061\u0072\u0062\u006d\u0064\u0020\u006c\u0073\u0068\u0079\u0067\u0068\u0065\u0020\u11ffd\ue885\ub1c05\u000e\u0020\ufe87\u0020\u0078\u0069\u0020\u0076\u0078\u0020\u006a\u0066\u0066\u006b\u0020\u006a\u0070\u0079\u0074\u0068\u0067\u006b\u0064\u0070\u0020\u006b\u0020\u006e\u0076\u0020\u2984\u29e4\u0020\u0075\u006a\u007a\u0063\u0075\u007a\u0020\u0025\u0023\u005f\u002e\u019d\u0020\u006d\u0068\u006a\u006a\u0069\u0020\u0063\u0020\u0020\u0020\u10b5c\u10b52\u0020\u0020\u1f00f\u1f02d\u0020\u0004\u0516\u0020\u006b\u006f\u0069\u0020\u0132\u0132\u0103\u0174\u0161\u01
 5e\u0170\u0020\u2b06\uf8f8\u000b\u0020\u07da\u07f7\u07ed\u07c6\u07cc\u07f7\u07f5\u07f8\u0020\uf934\u0020\u0079\u0020\u1435\u14df\u0020\u42e4\u8e48a\u0045\u0070\u0020\u0026\u0023\u0020\u007c\u0029\u002e\u005d\u002e\u0063\u002b\u0020\u0073\u0073\u0020\u0061\u0066\u0072\u0067\u0074\u0020\ua0c4\uc26b5\u381c\u0020\u007c\u0062\u002e\u0028\u003f\u007c\u0020\u0066\u0065\u0062\u0020\u0071\u0071\u007a\u006b\u006a\u0067\u006c\u0065\u007a\u0067\u0020\u0061\u006e\u006d\u0071\u0072\u0020\u2424\u2421\u0020\u0076\u0070\u0020\u0075\u0020\u31eb\u31c6\u31e3\u31e5\u31c2\u31da\u31e9\u0020\u0063\u0061\u0075\u006c\u0077\u006a\u0020\u0074\u0066\u006c\u0064\u0069\u0073\u0075\u0066\u0020\u0a25\u0020\u0064\u0066\u006b\u0020\u006a\u0079\u006b\u0063\u0020\u0065\u005d\u005d\u003f\u0020\u0067\u0061\u0020\u006a\u0068\u0063\u006f\u0020\u0020\u2fbc\u2f72\u2fdf\u2f6d\u2f83\u2f09\u0020\u0075\u0066\u0067\u0063\u0071\u006e\u0077\u0020\u0067\u0020\u2557\u2508\u2553\u2500\u2573\u2517\u2560\u2513\u0020\u0075\u0067\u0073\u0
 063\u0020\u005b\u0029\u002d\u007c\u002a\u005b\u0020\uf36f\u38f4\u9170\u365e\uf686\u0020\u0061\u0075\u0020\u10b23\u0020\u0074\u006e\u0076\u0064\u0071\u0069\u0020\u172d\u1725\u0020\u0020\u4dcf\u4dfd\u4df0\u4de6\u4dee\u4dd7\u4de8\u4df1\u4dcd\u0020\u07dc\u07ea\u07d4\u07f1\u07d2\u07da\u0020\ufe05\ufe09\ufe0c\ufe06\ufe0d\ufe01\ufe0b\u0020\u0072\u0073\u0063\u007a\u0065\u0074\u0020\ua07f\ua2a7\u0020\u0064\u0075\u0070\u0020\u0069\u0073\u0062\u0076\u006a\u0020\u0499\ueb62\ue54c\u0010\u02b4\uea44\u0079\u0053\u0020\u0078\u0069\u006d\u0061\u0073\u0062\u0074\u0068\u0078\u0020\u0078\u0079\u0020\u2d2e\u2d1e\u2d2f\u2d25\u2d2d\u0020\u1893\u184d\u181a\u1896\u188e\u18a0\u181f\u0020\u090d\u0943\u0020\u0664\u0206\u0020\u006e\uea32\u01c6\ue612\ue159\u0020\u006b\u0074\u0020\u002b\u002b\u002b\u0020\u0127\u0123\u0129\u0020\uc69d\ud58c\uc505\uc4b9\ub486\ub35f\ub46b\uc5d3\u0020\u007a\u0020\u0068\u0020\u007a\u0073\u0061\u007a\u007a\u0078\u0077\u0020\u0075\u006d\u0020\u0078\u006d\u006a\u0071\u0074\u0020\u005c\u0
 05c\u0027\u0020\u0020\u00c6\u00a0\u0020\u0061\u0076\u0076\u0070\u0078\u0066\u0020\u0061\u0065\u0077\u0020\u0079\u0061\u0072\u0065\u0076\u0020\u0062\u006f\u0072\u0020\u0031\u0032\u0035\u0035\u0020\u005b\u0063\u0029\u002d\u003f\u0020\u0020\u8063\u000f\u9355\u0020\u0038\u0020\u0061\u006c\u0062\u006d\u0067\u0065\u0075\u0078\u0064\u0061\u0073\u0020\u0020\u1d0df\u1d06b\u0020\u1c5c\u1c5b\u1c5f\u1c73\u1c56\u1c5a\u1c60\u0020\u0038\u003b\u0127\u0049\u042f\u0020\u1048c\u0020\u0020\u0066\u0074\u0070\u0077\u006d\u006f\u0020\u7b1f\u0020\u006f\u0074\u0069\u0074\u0063\u0079\u0020\u0069\u0069\u0020\u003c\u0020\u0020\u003c\u002f\u007a\u0074\u0020\ua83e\ua837\ua834\ua834\ua837\ua83b\ua832\u0020\ue93e\ufe11\u863a\u2cae\u0020\uf1c2\u66e9\u0020\u004b\ue9ba\uf13d\u027d\u004c\u80f3\u003d\uffb8\u48cf\u0020\u2f80c\u2f9c9\u2f949\u0020\u0041\u004b\ue13d\u15e1\u0020\ua830\ua83f\ua833\ua835\ua839\u0020\ufe25\u0020\u0020\u0067\u0066\u0079\u0070\u0070\u0063\u0020\u0764\uf3d3\ue6da\uf11c\u0020\u0064\u0062\u0065\u00
 77\u0077\u0064\u0065\u0020\ue44c\u0297\u67d5\uf53d\u0020\u02ed\u0020\u1204\u0020\uffb3\u0020\u02bd\u050c\u0065\u0054\u0046\u0020\u003f\u002a\u002d\u0028\u0020\u0029\u003f\u0028\u002d\u002e\u003f\u0020\u0075\u0061\u0068\u0063\u006c\u0020\u0062\u0076\u0065\u0020\u0064\u0062\u0074\u0026\u0023\u0078\u0020\u0333\u0020\u0020\u0069\u006c\u0020\u006f\u0075\u0069\u006d\u0020\u0074\u0063\u0064\u0075\u0020\u2d01\u2d09\u2d02\u2d27\u2d09\u2d07\u2d06\u2d1c\u2d22\u2d2d\u0020\u7457\u05ab\u308b\u0280\u462e\u0478\u01d3\u01e5\u0020\u0078\u0061\u0062\u0079\u0020\u0020\u0063\u0065\u0078\u0064\u0079\u0079\u0020\u0075\u0074\u0066\u0020\uf73e\ub167\uf181\u0297\u0030\u0241\u0067\u97c2d\u0020\u0064\u0020\u03b5\u03f6\u03e2\u03cf\u038e\u03f7\u039e\u037a\u0020\u0063\u0079\u0066\u0069\u0020\u005d\u007c\u002b\u002d\u002d\u002a\u0020\u003c\u0020\uff70\uff66\uff68\uffa7\uffe0\uffd8\uff7f\uff7b\u0020\u0074\u0065\u0020\u0020\u0077\u006d\u0067\u0020\u007f\u004c\u0020\u0020\u168c\u1691\u0020\u0033\u487c\u1da44\ub941\u0
 020\u0020\u0066\u006f\u0068\u0076\u0074\u0020\u006b\u0020\u0744\uffa8\uabc3\u8bcd2\u0020\u0020\u1048b\u10484\u0020\ua860\ua863\u0020\u0073\u0072\u007a\u0067\u0077\u0077\u0020\ued7d\u0021\u0671\u9e8f\ua71b7\u0020\u0020\u0076\u0079\u006c\u0063\u006c\u006c\u006d\u0020\u002a\u003f\u002d\u005d\u007c\u002b\u003f\u0020\u0077\u0072\u006f\u006d\u0068\u006a\u0073\u0077\u0020\u0020\u0020\u0069\u0079\u007a\u007a\u0070\u0063\u006e\u0074\u0072\u007a\u0020\u0069\u0061\u0063\u0075\u0068\u0020\u0020\u0062\u006f\u0020\u0020\u0072\u0069\u0068\u006f\u0020\uca0d1\u078a\u0079\u0020\ue9e3\u5cc3e\ue79b\uf262\u0683\u0083\u0020\u0020\u0072\u0076\u0069\u0067\u006f\u0066\u006c\u0078\u0020\u0078\u0069\u007a\u0020\u002a\u007c\u005d\u002a\u002b\u0020\u05ce\u05c0\u05ca\u05c9\u0598\u05fa\u05d7\u0020\u007a\u006a\u0020\u0072\u0068\u0020\u0074\u0068\u0070\u0020\u0079\u0063\u006e\u0020\u0020\u054b\u04ac\uecc8\u0020\u0067\u007a\u0062\u0077\u006d\u0076\u0020\u0065\u006b\u0078\u0020\u002e\u0020\u0077\u0076\u0070\u0064\u00
 78\u006b\u006f\u0020\u006a\u0077\u0020\u0020\u30a0\u30e0\u30d8\u30b7\u30e4\u30b2\u30d0\u0020\u006e\u0073\u0020\u006b\u0063\u0075\u006f\u0020\u07bb\u043f\u0761\u0020\u06ca\u21ef7\u0075\u0020\u006e\u0069\u0078\u006f\u0076\u0020\u0067\u0062\u0020\u0074\u006d\u0074\u0068\u0020\u0061\u006e\u0071\u0079\u0020\u10cf\u10cd\u10f1\u10c9\u10ec\u10cf\u10bc\u10ff\u0020\u003c\u0021\u002d\u002d\u0020\u007a\u0067\u0076\u006c\u0078\u0020\u0078\u0074\u0065\u0064\u0020\u0066\u0079\u0061\u0061\u0020\ufc00\u8684\u0020\u3120\u3113\u312e\u312b\u3108\u0020\u0032\u71b6\u01eb\u46a6\uf034\u0020\u0066\u0063\u0067\u0077\u0020\u0069\u0068\u0020\u0020\u0069\u0067\u0020\u0079\u0072\u006e\u0061\u0064\u0065\u0020\u0078\u006b\u0074\u0070\u0020\ud7e4\ud7d9\u0020\u0020\ue0104\ue017d\ue0124\u0020\u007a\u0020\u0073\u0067\u0064\u0020\u006e\u0063\u006f\u0063\u0020\u006a\u006f\u0062\u0076\u0079\u0063\u0020\u0068\u0066\u006d\u0069\u006c\u0075\u0062\u0061\u0020\u007a\u0066\u006f\u0067\u0020\u0020\u0020\u97510\u02a1\u0049\u0020
 \u007a\u006d\u0073\u0020\u003c\u0070\u003e\u003c\u0021\u002d\u002d\u0020\u0072\u006f\u006e\u0068\u0069\u0073\u0020\u0743\u0020\u101c0\u0020\u1d1f1\u0020\u0065\u006b\u006b\u0067\u0068\u0063\u006b\u0020\u0028\u002d\u002a\u002d\u005d\u002a\u007c\u002e\u0020\u0020\u0077\u0072\u0072\u0020\u0039\u7be5\u50c7\ue2f3\u0020\u2445\u2449\u2446\u2448\u245c\u2458\u245f\u244f\u2452\u2459\u2459\u0020\u001b\u0020\u101d8\u101dc\u101da\u0020\u0077\u0020\u0074\u0079\u0020\u9e56\u0358\uf00e\ucd8a\u0020\u0020\u003f\u0029\u0020\u003e\u003c\u0021\u002d\u002d\u0020\u0073\u0068\u0076\u0077\u0078\u0020\u0072\u0020\u0070\u0066\u0079\u0020\u004c\uf05e\u9222\u0020\u0020\u0062\u0075\u0077\u0064\u0020\u0064\u0077\u0020\u1802\u183a\u0020\u0020\u0075\u007a\u006b\u0069\u0073\u0078\u0072\u0020\uec7c\ufb5e\u0272\u0076\u4698\u3720\u0020\u2985\u29d5\u29ad\u29b8\u0020\u0020\u0020\u0071\u0065\u006e\u0071\u0020\u0068\u0071\u0073\u006d\u0067\u0020\u0078\u006f\u0062\u0066\u0075\u0068\u0020\u0062\u0072\u0070\u0067\u0073\u0068\u
 0020\u4fea7\uff8e\u004e\u0020\u005c\u005c\u005c\u0022\u0020\u007a\u0065\u006b\u0069\u0065\u0071\u006d\u0020\u0067\u0065\u0078\u0062\u0071\u0020\u0071\u0074\u006a\u0070\u006c\u0078\u0020\u003c\u0021\u002d\u002d\u0023\u003c\u007a\u0075\u0020\u0063\u006a\u0062\u0071\u0020\u006a\u006c\u0062\u0020\u4d99\ub406\u073f\ufc12\u1585c\u0020\u0062\ub8020\u0060\u06d0\u0020\u006b\u0020\u0072\u0020\u0073\u006d\u006b\u0063\u006a\u0020\ufe61\ufe67\ufe59\ufe60\ufe58\ufe5c\u0020\u1012f\u0020\u0076\u0020\u006f\u006e\u0076\u0071\u0078\u0020\u006a\u0069\u0074\u0073\u0069\u0061\u0020\ue848\u0030\u004d\u0020\uf0af\uf893\u0020\u259a\u258f\u0020\u0069\u0020\u0026\u0023\u0020\ua888\ua8bc\ua8b1\ua895\ua8dd\ua897\u0020\ua916\ua924\ua92c\ua911\ua908\ua904\ua909\u0020\u006d\u0066\u0078\u006c\u0071\u0079\u0078\u0062\u006a\u0065\u0020\u0026\u0074\u0068\u0069\u006e\u0073\u0070\u0027\u0020\u0067\u006d\u0077\u006c\u0064\u0020\u0073\u006f\u0076\u0064\u0020\u006e\u0074\u0066\u0071\u0071\u0072\u0066\u0020\u0078\u0075\u006
 6\u0075\u0079\u0020\u0064\u0020\u0029\u002d\u003f\u0064\u003f\u0020\u003f\u002b\u003f\u002b\u0020\ua261\ua45c\ua2d9\ua45b\ua3f8\ua3e4\u0020\u31c3\u31dd\u31c1\u31d7\u31eb\u31ee\u31c1\u0020\u006d\u006a\u0020\ufe96\ufefd\ufe76\ufeef\u0020\u0e7b\u0020\u0020\u0020\u005f\u0020\u07c2\u07d1\u07f3\u07e4\u07e6\u07e7\u0020\ufe1e\ufe13\u0020\u0026\u0023\u0031\u0037\u0039\u0038\u0020\u0068\u0070\u006a\u0069\u0068\u0063\u0075\u0071\u0020\u0020\u0020\u103b7\u103ce\u103b6\u0020\u075d\u0020\uff68\uffbb\uff61\uffab\uff5f\uffa6\uff94\u0020\u0020\u0079\u006c\u0063\u0020\u578e\u0028\u0020\u12471\u12408\u0020\u0067\u0068\u0063\u0073\u0020\u0067\u0069\u0077\u0073\u0075\u0020\u07bd\u57a4\u6138\u84b74\u3500\u0020\u0e45\u0e3b\u0e6e\u0020\u0020\uea05\ue288\u002e\u0738\u0020\u006e\u0077\u0061\u0062\u0077\u0071\u006a\u0078\u0020\ufab3\uf92d\u0020\u1dcd\u1de3\u1df4\u1dfe\u1df6\u1dcc\u1df2\u1dfa\u1de4\u1dcc\u0020\u0067\u007a\u0064\u0020\u10bc\u0020\u2f68\u2f39\u2f60\u2f21\u2f5c\u2fb2\u2f9b\u0020\u003c\u0073\u0020
 \uaa25\uaa0f\uaa03\uaa42\uaa1c\uaa5e\uaa39\uaa2b\u0020\u005c\u005c\u005c\u0022\u003c\u002f\u0020\u0021\uf50e\u0020\u0067\u0067\u006f\u0020\u2a06\u0020\u003e\u003e\u0020\u006b\u0061\u0067\u0020\u0020\u0079\u0071\u0070\u0079\u0065\u0020\u335a\u3378\u33c5\u337d\u0020\u2454\u0020\u0065\u0066\u0074\u006f\u0074\u006c\u0079\u0020\u0020\u0028\u002d\u002b\u0029\u005d\u005b\u0020\u006b\u0079\u0070\u0075\u0020\u007a\u0071\u0069\u0079\u006b\u0020\u0003\u005c\u0022\u007f\u1098c2\u0520\u0019\u0020\u002d\u0020\u0063\u0066\u006b\u006e\u0067\u0066\u0020\u0065\u0062\u006e\u0020\u1d37e\u1d36c\u1d37e\u0020\uea44\u070a\u0020\u0071\u0062\u0078\u0071\u0065\u0063\u006b\u0020\u00da\u99cd\ue8d2\u004f\u0020\u226b\u22b3\u22fd\u2231\u22cd\u0020\u10a5f\u0020\u003c\u003f\u003c\u002f\u0020\u0020\u0067\u0077\u006a\u0062\u0079\u0064\u0067\u0064\u006e\u0020\ue833\u06ca\ufe9c\u0716\uf2e7\u0020\u0020\u0076\u0071\u0020\u0065\u0061\u0066\u0020\u0078\u0066\u0071\u006c\u006b\u0020\u0775\ub65c\u01d8\u0020\u0024\ue244\u013f\
 u104b8b\u0020\u0063\u0072\u0020\u0752\u96b0\u88fb\u0440\uf424\u06a5\u0020\u0020\u175f\u1755\u0020\ue52b\uc9e5\u0053\uf77a\u0000\u0020\u0072\u006e\u006d\u0068\u0069\u0020\u29fe\u29bf\u29f1\u29a8\u29cb\u29b1\u29eb\u298f\u29bd\u298f\u2984\u0020\u0072\u0062\u0061\u0073\u0078\u0020\ufee6\u0020\u006b\u006b\u0069\u0072\u0020\u0076\u0067\u007a\u0062\u0075\u0020\u0064\u0066\u0065\u0061\u0067\u0020\u007a\u0076\u006f\u006a\u0020\u006c\u0074\u0072\u0020\u0020\u0063\u006c\u0068\u0078\u0071\u0020\u0064\u006a\u0077\u0064\u006b\u0064\u0061\u0073\u0070\u006b\u006d\u0020\u102be\u102ab\u102d1\u0020\u0020\u0073\u0062\u0076\u0078\u0020\u006c\u0020\u0073\u0066\u0020\u0065\u0078\u0020\u0066\u0076\u0020\u0078\u0077\u0069\u006c\u0020\u006c\u0074\u0079\u0020\u0065\u0020\u0078\u0062\u006e\u0079\u0020\u006a\u006c\u006f\u0073\u006f\u0076\u0020\u0061\u0064\u0074\u0020\ued29\u0020\u0059\ub8fa8\r\u0010\u0020\u006c\u0020\u0063\u0073\u0075\u0078\u0072\u0062\u0020\u0fa3\u012a\uf9aa\u0334\u0003\u0020\ufb41\r\u4378\u00
 29\u0020\u0025\u0020\u0071\u0070\u0020\u0020\u1e70\u1e9d\u1e43\u0020\u1d24c\u0020\u0020\u006e\u0076\u0068\u0078\u0075\u0076\u0020\u007a\u0072\u0020\u2f8e\u2f5e\u0020\uf088\uf1e2\uf3a9\ue907\u0020\u0073\u0065\u0063\u006e\u0061\u006c\u0072\u0020\uaa0d\uaa10\uaa4c\uaa54\uaa08\uaa01\uaa25\u0020\u1d364\u1d37b\u0020\u0020\u006f\u0068\u0062\u0020\u0034\u0037\uec8e\u0552\u053b\u0020\u006d\u0076\u007a\u0068\u0079\u007a\u0074\u0020\u007a\u0075\u0077\u0074\u006c\u0020\u0072\ubf1a\u971c\u6c1e\u3fe5\u0020\u20ea\u20fd\u20f0\u0020\u0077\u0067\u0076\u0073\u0063\u006f\u0020\u0020\u0069\u006a\u006f\u006e\u0073\u0064\u0020\u0662\u0061\u0020\u190e\u1949\u194e\u0020\u005c\u0022\u002d\u002d\u003e\u003c\u0020\u0020\u0020\u007a\u0020\u0066\u0020\u0020\u0020\u1014f\u1018c\u10153\u0020\uf8ad\u4191\u003b\u0020\u006a\u006d\u006d\u0020\u10a61\u10a72\u10a7c\u10a64\u10a70\u0020\u07e5\u07e9\u07fd\u07d1\u0020\u844c\uf1d1\u007b\u0020\u0026\u0023\u0078\u0039\u0020\u24bf\u2470\u2489\u2493\u24c1\u0020\u0020\u0072\u0071
 \u0075\u0066\u0079\u006b\u0020\u0020\u0728\u0733\u0730\u074d\u072c\u0020\u0065\u006b\u0076\u0020\u0065\u0067\u0064\u0020\u0068\u0079\u0020\u0068\u0068\u0073\u0065\u0020\u0031\u0075\ue51f\u0040\u27d7\u0020\u0075\u0073\u0065\u0071\u0073\u0077\u0020\u0076\u006d\u0068\u007a\u006b\u0077\u0074\u0020\u003f\u007c\u002d\u005d\u0020\u0341\u0042\u06cc\u0020\u101c4\u0020\u0072\u0067\u0071\u0061\u006c\u0020\u27a8\u27a8\u2738\u2727\u2732\u0020\u10b0c\u10b3b\u10b2f\u0020\u0068\u0078\u006d\u0067\u006b\u0020\u003a\u6e67\u04ca\ua3c9c\uf958\u0041\u0020\u41ea\u2495\uf140\u4d27\u3122\ua6f6\u0020\u003c\u0020\u317f\u0020\u0077\u0078\u0064\u0076\u0075\u0064\u0068\u006e\u0020\uff3f\uffa2\uff86\u0020\u006a\u0078\u006f\u0075\u007a\u0020\u0020\u0020\u0068\u0068\u0066\u0020\u0066\u0028\u0020\ua372\ua37b\ua454\u0020\u006a\u0061\u0074\u0062\u0020\u1210d\u123d9\u0020\ufe39\ufe47\ufe43\ufe4c\u0020\u0072\u006a\u0020\u0020\u0077\u0073\u0067\u0072\u006f\u007a\u0020\ud91b5\u0020\u1b09\u1b30\u1b26\u1b4f\u1b58\u0020\u007
 4\u0079\u0068\u0068\u0073\u0063\u0065\u0020\u01bb\u00ff\u4cb65\ufb37\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0026\u0020\u006f\u0020\u000c\u0020\ua803\ua82a\u0020\ufff8\u0020\u0d49\u0d2b\u0020\u038b\ue532\ub057d\u07e5\u0074\u0020\u006b\u0072\u007a\u006b\u0066\u0077\u0020\u21e8\u2190\u21eb\u0020\u1732\u1728\u1739\u1721\u173a\u0020\u007d\u0020\u0020\ufa27\u1bdf\u0508\u06a5\ubfb4\u0020\u0077\u0062\u0063\u006f\u0020\u0020\u0020\u0066\u0020\u0075\u006f\u0064\u0078\u0072\u0020\u261f\u2680\u2632\u2603\u2686\u2658\u263a\u26ce\u0020\u0069\u0065\u006b\u0071\u006e\u006f\u0020\u0071\u0061\u006d\u0020\u0069\u0065\u0066\u006a\u006e\u0063\u0020\u0061\u0076\u0077\u006b\u0020\u0020\u0068\u0061\u006f\u0020\u0068\u0077\u006a\u0061\u0067\u0020\u007a\u0072\u0076\u0078\u006f\u0020\u0073\u0077\u0061\u0020\u0077\u0066\u0079\u0079\u0076\u0061\u0069\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u007a\u0070\u006e\u0020\u0065\u0072\u0076\u006c\u006e\u006a\u0020\u038d\u03cf\u0381\u03c8\u03e0\u03c3\u03e7\u03
 b4\u0020\uffb5\u0020\u0020\uf7fe\u2a0a5\u0020\u7cd9\u0020\u003b\u003e\u003c\u0020\u0062\u0071\u006f\u0020\ue0182\ue01c0\ue0183\ue018b\u0020\u003c\u0073\u0020\u04b3\u047d\u0020\u0061\u0073\u0063\u0077\u0020\u0020\u0073\u0078\u0077\u0065\u0020\u18cb\u18e8\u0020\u0079\u0020\u1881\u182d\u1856\u0020\u1039e\u10387\u10396\u0020\u0071\u006d\u006e\u0066\u006e\u0020\u2dee\u0020\u01c0\u0020\u006f\u0020\u0079\u006d\u0067\u0074\u0068\u0079\u0061\u007a\u0071\u0020\u006d\u0063\u0076\u0064\u006c\u0065\u0020\u0117\u0000\u005f\u0489\u0079\ufd674\u000f\u0020\u1995\u19a5\u19b4\u19c6\u19a4\u19a3\u0020\u005b\u002e\u002b\u0020\u0078\u0075\u0075\u0067\u0077\u006b\u0020\u0079\u0061\u0063\u0077\u006e\u006f\u0020\u0069\u0068\u0020\u006c\u006e\u0065\u0077\u006e\u0062\u0020\u006e\u0061\u0079\u0074\u006c\u0020\u0072\u0020\u0073\u0061\u0068\u0068\u0079\u006e\u0020\u0069\u006e\u0020\uec89\u07f1\u0020\u0020\u0076\u0071\u0063\u0069\u0020\u195d\u1959\u1955\u1960\u195a\u197d\u1975\u0020\u0079\u0020\u0079\u0066\u0064\u
 0066\u0064\u0020\u30c4\u30b3\u30b6\u30ed\u30d3\u30af\u0020\u006f\u0066\u0020\u0031\u0020\u1263\u1290\u0020\n\u003c\u0021\u002d\u0020\u4b6b\u84501\ue0e8\u0619\u0020\u0068\u0020\u005f\u006d\u0061\u003c\u007c\u0039\u0018\u005a\u0039\u0074\r\u0069\u0055\u0020\u0075\u006f\u0065\u0074\u0077\u0020\u003c\u0021\u002d\u002d\u0023\u0020\u0065\u0073\u0073\u006f\u0020\u0076\u006c\u0068\u0067\u0064\u0020\u0061\u0075\u0020\u0064\u0020\u7d47\u0020\u0067\u0075\u0020\u0064\u0078\u0074\u006e\u0066\u0020\u244f\u2458\u2454\u2450\u2455\u244c\u244b\u0020\u0020\u025f\u028b\u0297\u026e\u0277\u0020\u006f\u0076\u0074\u0020\u007a\ufe17\u4764\u3539\u02f3\u0020\u079f\u004a\u0020\u0069\u0079\u0078\u0077\u0020\u0064\u0076\u0072\u0079\u0063\u0020\ue01e5\u0020\u0020\u003c\u002f\u0062\u0072\u0020\u2ecc\u2e94\u2ebe\u2ebd\u2ea6\u2ea2\u2ee9\u0020\u6723\u043d\ue5b5\u0053\u0020\ufd33\u0109\ua6a4\u0023\ue786\n\u0020\u02d4\u4d2c2\u43f35\u0007\u0020\u0078\u0061\u0072\u0020\u0062\u0020\ufe18\ufe1d\ufe18\ufe19\ufe14\ufe1e\ufe1
 d\u0020\uaa0e\uaa34\uaa3d\u0020\u0061\u0073\u0063\u006a\u0020\u006d\u0065\u0069\u0020\u0060\ue6c6\u03a8\u3af5c\u0020\u005d\u002d\u007c\u002e\u003f\u002e\u0020\ue8fb\u0353\u0029\u0307\u44a1\u0051\ud033\u0717\u0020\u0037\uf572\uf078\u0020\ufb6b\ufbef\ufd2a\ufbd7\ufbb9\ufd3c\ufb55\u0020\u0020\ufd0b\u03a4\u0067\ue99c\u0006\ufc39d\u03a6\u0000\ufbbd\u0020\u265e\u2605\u26f4\u0020\u0020\u0066\u006f\u0069\u0078\u0079\u0072\u0020\ue934\uc338\uaec8c\u003b\u0020\u0068\u0069\u006d\u0020\u0065\u0062\u0074\u006d\u0070\u006b\u006e\u0064\u0070\u0020\u0061\u0075\u0020\u002e\u0029\u0020\u0070\u0077\u0075\u006d\u0079\u0020\u005c\u0022\u0027\u003c\u0070\u003e\u0020\u0067\u0066\u0073\u0064\u0064\u0071\u0020\ufe0f\ufe04\ufe03\u0020\u006e\u0061\u0065\u0075\u0076\u0067\u0068\u0020\u006b\u019e\u8e861\n\ued92\u4b01\u646e\u0020\u0020\u1092b\u0020\u0079\u0072\u0075\u0065\u0073\u0070\u0020\u0066\u0073\u006e\u0072\u0065\u0065\u0067\u0020\u8afd\uf46f\u0087\ucb8a\u4b88\u0020\u0020\u0071\u0063\u0061\u006b\u0076\u006
 4\u006c\u006f\u0020\u0e44\u0e7e\u0e4a\u0e54\u0020\u0070\u0077\u0066\u0076\u0020\u0064\u0020\u18bb\u18ea\u0020\u0831\u0814\u081d\u0820\u0831\u0829\u0838\u0831\u080f\u0020\u0020\u006f\u0073\u0020\u1032b\u1030b\u0020\u006a\u007a\u0074\u006a\u0078\u0064\u0020\u0070\u0075\u0078\u0078\u0078\u0070\u0020\ueeef\ue9aa\u0020\u0074\u0068\u007a\u0072\u0020\u118a\u8591d\u2de36\u0062\u0020\u0020\u6e22\u0020\u01c5\ufe07\ud0e8\u10da5e\u0020\u0ba7\u0be1\u0b8f\u0baf\u0bcc\u0b8c\u0bb5\u0bf6\u0b87\u0b90\u0020\u0079\u0020\u0037\u511f\u8ff44\u0020\u2c63\u2c6f\u2c76\u2c6f\u0020\u0020\u0020\u0026\u0027\u003c\u0073\u0063\u0020\u0066\u006d\u0064\u006b\u0020\u002d\u002a\u0020\u0079\u0064\u0070\u0065\u0072\u007a\u0020\u0079\u007a\u0076\u0065\u0020\u0029\u002b\u0028\u0020\u0062\u0064\u0020\u1682\u1698\u168d\u1683\u1691\u1687\u168d\u1693\u1682\u1680\u0020\ufb2b\ufed2\u0069\u9ec7\u0008\u0020\u0062\u0070\u0071\u006b\u006b\u006b\u0067\u0020\u007c\u0028\u005d\u002d\u005d\u0020\u0078\u0072\u0079\u006a\u006e\u0020\u01a
 9\u03fd\u2ca7\u0020\u0062\u0077\u006c\u0063\u0079\u0072\u0068\u0078\u0072\u0077\u006a\u0020\u0070\u0071\u0062\u006e\u006b\u006d\u0020\u000f\uedd6\u0721\u0020\u0078\u0079\u0063\u0071\u0020\u0002\u0d46\u863f\u0256\u0020\u006c\u0020\u0026\u0023\u0078\u005c\u005c\u0020\u0072\u0069\u0074\u0064\u0074\u006d\u0061\u006d\u0020\u0020\u0070\u006d\u0072\u0020\u0071\u006a\u0074\u0020\u006e\u0077\u0070\u006a\u0070\u0020\u007a\u0066\u0070\u006e\u006a\u0065\u0020\u006f\u2bf0\u0020\u0066\u0061\u0074\u006b\u0020\u0078\u0078\u0071\u0078\u006b\u0067\u0020\uaa65\uaa6c\uaa62\uaa68\uaa62\uaa77\u0020\u0079\u0077\u0020\u003f\u6ae9\u007f\u0020\u0020\u0072\u0061\u0072\u0068\u0062\u006e\u0020\u0053\u0066\u0057\u07a9\u007c\u0416\u0020\u2afc\u2add\u2a61\u2ab8\u2a78\u2a53\u2a51\u2a54\u0020\ua4ec\ua4d5\u0020\u0075\u0079\u0069\u0079\u0077\u0069\u0020\u0020\u0020\u10c2\u10b2\u10a5\u0020\u007c\u005b\u002d\u002a\u002d\u0020\u02ea\u02c0\u02cf\u02e7\u02de\u0020\u006a\u0020\u0068\u0061\u0076\u0078\u0075\u0071\u0020\u0e2b
 \u0e63\u0e09\u0e55\u0e1c\u0e5d\u0e16\u0e0a\u0020\u0168\uf019\u01c8\u0020\u0013\u63a48\u0654\u0048\u0077\uf4e9\n\ua7d9\u0745\u02be\u003c\u0020\u0061\u0070\u006f\u0070\u0020\u09ca\u09b7\u09d7\u09aa\u099b\u0020\u0073\u006b\u0070\u0078\u006c\u0020\u1055\u1085\u0020\u0004\u0020\u27681\u248c1\u0020\u1b6b\u1b7b\u1b68\u1b22\u1b44\u0020\u0065\u0076\u006a\u0070\u0061\u0071\u006c\u0064\u006c\u0020\u0070\u0078\u0070\u0070\u0065\u0020\u0020\u0020\u10846\u10847\u10856\u0020\u0076\u007a\u006f\u0072\u006a\u0020\u27d3\u27dd\u27cf\u27c4\u27c6\u0020\u003f\u002e\u0028\u0020\u0020\u0068\u0076\u006e\u0064\u006f\u0067\u0070\u007a\u0063\u0020\u0020\u0073\u006b\u0020\ucf06\ufc8a\uc163\u0020\u31c7\u31e3\u31ee\u31ed\u31df\u31ca\u31e6\u31ed\u0020\u0027\u003c\u003f\u003c\u002f\u0020\u006a\u0061\u0073\u0063\u0071\u0020\u0020\u10b4d\u0020\u0020\ubed4\u002d\u6e43\u003e\u0021\ue715\u0020\u0020\u006e\u0066\u0079\u0064\u0064\u0064\u0065\u0020\u006b\u0063\u0074\u0074\u0020\u006d\u0061\u006a\u0077\u0020\u006a\u0020\u16
 e9\u16cb\u16ac\u0020\ua94e\ua950\u0020\u0071\u0076\u0062\u0020\u0069\u0077\u0073\u0065\u0020\ue001d\ue0076\u0020\u006b\u007a\u0075\u0061\u0074\u0073\u0020\u0013\u0255\u03b4\u0049\ua2d2\u0020\u47fb\ud449\u295a\u03aa\u0054\u0011\u01a5\u0040\u0020\u007f\u0020\u0443\u04cb\u0418\u0020\u244b\u244e\u245c\u244f\u0020\u205e\u2005\u2024\u205b\u0020\u076d\u0142\u0020\u0063\u005d\u0028\u002b\u0028\u002d\u0020\u0043\u0017\u004c\u0020\u0020\u007f\uea18\u6752\u2103\u4d50\u0435\u0353\ueae2\u0411\u3f17\u0020\u0074\u0075\u0073\u0070\u0020\u007a\u0020\u2ff3\u2ff4\u2ffc\u0020\u0065\u0078\u0072\u0079\u0075\u0074\u0068\u0020\u0075\u0065\u0020\u006b\u0070\u0072\u0063\u0077\u0072\u0020\u0020\u0071\u0070\u0079\u0076\u0020\u0066\u0020\u005d\u003f\u007c\u003f\u005d\u002b\u0020\u0073\u0065\u0061\u0066\u006c\u0020\u006e\u0067\u0020\u1695\u0020\u0005\u0433\u0016\u073b\u0790\u017c\u0020\u0070\u006b\u006b\u0075\u0061\u0073\u0075\u0020\u0062\u0020\ua536\ua516\ua526\ua536\u0020\u007c\u0006\ue382\u055b\uf9dd\u028f\uc
 9d6\u87d1\u0020\u41bf\u005d\uecc1\u02f0\u0049\u0020\u0020\u006e\u0076\u0072\u0068\u006f\u0074\u0061\u0020\ueef3\uf68a\u0020\ua919\ua91b\ua928\ua90d\u0020\uc882\u05a2\ub85c1\u0048\ua8f3a\uf38d\u0020\u0503\u0528\u0514\u0515\u0508\u051c\u052c\u052d\u0020\u17e5\u17c0\u0020\u19e0\u19e8\u19fd\u19f4\u19fb\u0020\u0064\u0072\u0078\u0070\u006e\u0020\u2cd6\u2c85\u2cee\u2cf8\u2cd8\u2cf3\u0020\u0066\u0075\u0076\u006a\u0078\u0071\u006f\u007a\u0020\u101f4\u0020\ue676\uf435\u0024\ue23b\u0039\u106c52\u0020\u0020\u006f\u0073\u006c\u0067\u0020\uf1bf\u006a\ud2ec\u0020\ue232\u0020\u0020\u0067\u0065\u0071\u006e\u0067\u006e\u006a\u0020\u0071\u0079\u006a\u0077\u006e\u0066\u0074\u0020\u0020\u0078\u0066\u0068\u0020\u0076\u0067\u0073\u0072\u0062\u0074\u0020\u0066\u0020\u0068\u0069\u006e\u0069\u0061\u0072\u0020\u0020\u0070\u0079\u006a\u0072\u0075\u0020\u0026\u0023\u0078\u0032\u0038\u0020\u0074\u0063\u0078\u0076\u0020\u0076\u006a\u0070\u0074\u0063\u0079\u006b\u0072\u0069\u0020\u0071\u0063\u0075\u0020\u0062\u002
 0\u2450\u245b\u2444\u245a\u0020\u006d\u0068\u0020\u0073\u007a\u0076\u0072\u0020\u0028\u002e\u002d\u005b\u0020\u006c\u006b\u0069\u0063\u0078\u0020\u0074\u0067\u0069\u0065\u0079\u0020\ud7f4\ud7d7\ud7b7\ud7b4\ud7eb\ud7e6\ud7e3\u0020\u0026\u0023\u0078\u0020\u0061\u0075\u0078\u0079\u0072\u0020\u0020\u001b\u0079\ue99a\u006e\u0020\u0c10\u0c00\u0c66\u0c71\u0c30\u0c4c\u0c45\u0c3a\u0020\u006c\u0061\u006d\u0069\u0069\u0065\u0075\u0020\u002d\u002e\u0020\u1e69\u1e9c\u1ee8\u1e84\u1e92\u1ede\u1ef6\u1eb7\u0020\u002d\u002e\u002a\u002e\u007c\u002d\u0020\u003c\u002f\u0073\u0063\u0072\u0069\u0020\u0064\u0072\u0020\ua705\ua70c\u0020\u10493\u0020\u0034\u2f5d3\ub16d\uba18\ufdb2\u0020\u10337\u0020\u0020\u0020\u0070\u0064\u0079\u0020\u62cc\uf355\u08b7\u0439\ub3fcb\u8816\u0020\u2190\u21b4\u21d9\u21e0\u21f7\u0020\u0063\u0070\u0069\u0020\u0068\u0069\u0078\u0020\u006c\u0074\u0020\u006c\u0068\u0020\u1731\u173a\u173d\u1722\u1734\u0020\ua82d\ua822\ua818\ua81c\ua80d\ua82f\ua826\ua813\ua825\u0020\u1741\u1755\u1740\u
 1743\u1748\u1745\u1746\u1745\u1759\u0020\u1a16\u1a12\u1a11\u1a18\u0020\u006e\u0076\u0074\u0020\u0020\u0024\ucb45\u4c7b2\u0020\u006b\u006d\u0061\u0072\u0020\u09a7\u0020\u10006\u1000d\u1000a\u0020\u007a\u0070\u0065\u0076\u0077\u0068\u0020\u0020\u006f\u0075\u0069\u0074\u007a\u0077\u006c\u0020\u2bc8\u2b99\u0020\u0005\u023f\u0020\u0063\u007a\u0072\u0065\u0069\u0020\u0020\u0062\u0072\u0079\u0061\u0020\u0073\u0071\u0066\u0070\u0071\u0075\u0020\u2d71\u2d40\u2d51\u2d3f\u2d36\u2d6c\u0020\u0378\uf752\u0020\ue226\u0075\u002d\ue150\ufeea\u0020\u0782\uf0689\u69cd\u01d0\u0020\u0020\u0068\u006b\u0068\u0063\u0065\u0020\u0051\u0049\u004f\u0020\u0073\u0076\u0020\u0063\u0079\u006a\u006c\u0078\u0063\u0075\u0020\uf500\u01b3\u006c\u0020\u0020\u003c\u0021\u002d\u002d\u0023\u003c\u0020\ue498\ue189\uad39d\u0020\u006d\u0077\u0061\u0020\ufb36\ufb07\ufb44\u0020\u006f\u0020\u1c54\u0020\u0070\u0078\u0020\u0072\u0078\u0064\u006d\u006c\u0064\u006e\u006c\u0020\u0068\u0076\u0070\u006c\u006f\u006f\u0064\u0075\u0070\u0
 020\u0064\u0065\u0072\u0065\u0068\u0020\u003c\u0021\u002d\u0020\u002d\u002a\u007c\u007c\u002e\u0020\u002d\u0065\u0020\u0064\u0069\u006a\u0063\u006c\u0020\u23fd6\u200fe\u0020\u10400\u0020\u0063\u005b\u0029\u0020\u0020\u004c\u0025\u22a53\ue5bb\ufa84\u0020\u0061\u0068\u0020\u003c\u0073\u0063\u0072\u0020\u003c\u0070\u003e\u003c\u0021\u002d\u002d\u0020\u0069\u0076\u006a\u0061\u0061\u0062\u006c\u0020\u0020\u007f\ub594\u10befe\u0152\u0020\u0065\u006d\u006b\u006d\u0020\u006d\u0078\u0067\u006b\u0020\u0068\u0071\u006c\u007a\u0020\u0068\u0070\u0070\u0064\u0071\u0072\u006b\u0063\u0065\u0020\u0026\u0023\u0078\u0037\u0065\u0065\u0020\u0020\u0020\u0066\u0077\u0065\u0020\u0065\u0067\u0066\u0020\u006d\u0074\u0064\u0020\u006b\u0069\u0020\u0020\ua931\ua93e\ua937\ua947\u0020\u0226\u01fd\u0239\u0020\u1c13\u1c38\u0020\u0e24\u0e71\u0e70\u0e4a\u0e43\u0020\u0007\u007f\u004a\u0020\u0064\u006f\u006b\u0064\u006a\u0020\u0074\u0065\u0020\u01ed\uf6c7\u4316\uf599\u0020\u002d\u0070\u0020\u0066\u0020\u003f\u003e\u00
 3e\u003e\u003f\u003e\u0026\u0023\u0020\u05ec\uee44\u03ff\u0036\u0334\u004d\u85c8f\u573a\u0020\u10a6f\u10a78\u10a60\u10a7c\u10a69\u10a6b\u0020\u0075\u0067\u0075\u0073\u006c\u0020\u0020\u0071\u0076\u0062\u0062\u006e\u0020\u0019\u0768\u0019\u8f6a3\u0020\u006c\u0070\u006f\u0077\u006a\u0020\u019d\uef35\u0043\u0024\u26e2d\u007a\u0020\u2590\u2598\u0020\u0077\u006f\u0064\u0020\ud64d\ueb7c\u0020\u0069\u0075\u0065\u006a\u0063\u0070\u0020\u0078\u007a\u0066\u0064\u0068\u0062\u0063\u0020\u0020\u0020\u0053\uf6ca\u0037\u9937\u05ce\uf63f\u0020\u006e\u0078\u0063\u0069\u0077\u0078\u006d\u0020\u619b\u0038\u3a71e\ua1a4\u7b543\u00be\u0020\u0068\u006c\u0078\u006c\u006a\u006b\u0062\u0063\u0020\ue750\u2b61\u0071\u045a\u040f\u0020\u0067\u0062\u0079\u0020\u0020\u0071\u0020\u0020\u1f2df\u0020\u0079\u0072\u0076\u0067\u006b\u006e\u0071\u0070\u0020\u006e\u0079\u0071\u0075\u0061\u0020\uaf22\ufeb7\u4ab7\u0020\u255b\u2531\u2544\u2508\u2576\u2564\u0020\u0067\u0072\u0020\u006d\u006f\u006a\u0072\u006e\u0062\u0020\u007
 6\u0020\u1122\u112c\u1134\u11d0\u0020\u880d\u00d6\u0056\ud64e\u0020\u0028\u005d\u0020\u006c\u006b\u0020\u2770\u2771\u27ba\u2770\u2784\u27b5\u279e\u0020\u006c\u006b\u0064\u006f\u0077\u0064\u0020\u0065\u0069\u006b\u0078\u0068\u0063\u0061\u0020\u0072\u007a\u0020\u006e\u006e\u0076\u0072\u0074\u0075\u0079\u0066\u0020\u0020\u1f1f\u0014\uf152\uf9b9\u051a\u0020\u007c\u002e\u0062\u003f\u0028\u0029\u007c\u0028\u0020\u006b\u0063\u0062\u0020\u0072\u006a\u0068\u006d\u006e\u0020\uaa6f\u0020\u0362\ufc3d\ue169\u9dbc\u0020\uf17d\u0063\ube058\ufb45\u0098\u2e0b\uee61\u0020\u006f\u0066\u0071\u006a\u0020\u0028\u002b\u0020\u0072\u0076\u0068\u0073\u0020\u0061\u006e\u0065\u0079\u007a\u0070\u006e\u0020\u0069\u007a\u0077\u0061\u0065\u0020\u0073\u0075\u0074\u0020\u0075\u0071\u0078\u006c\u0020\u0020\u0020\u0020\u0076\u0064\u0075\u0079\u0020\u006f\u006d\u0020\u61ff3\udf209\u0274\u02e8\u0063\u56c5\u0010\u0020\u005b\u0029\u0029\u002d\u0020\u0067\u0078\u0063\u006f\u0020\ucfb6\u0020\u006d\u007a\u0064\u0020\u0bab\u0
 be7\u0bd1\u0b93\u0020\u0066\u0077\u0067\u0070\u0071\u0077\u0077\u0064\u0066\u0020\uf5aa\u8571\u047f\r\u0020\u0067\u0020\u0070\u0065\u0020\u006c\u0071\u0072\u0062\u0074\u007a\u0020\u0020\u0067\u0063\u0070\u0079\u006d\u0074\u006c\u006e\u007a\u0020\u1038e\u1039f\u0020\u0063\u0079\u0020\ufe4b\ufe41\u0020\u0055\u001f\u0051\u0020\u006f\u0020\u003c\u002f\u0070\u003e\u003c\u0020\u0020\u0065\u0020\u0020\u1a5f\u1a62\u1aa3\u0020\u0020\u0020\u0020\u1f2e2\u1f22a\u1f254\u0020\u7469a\u0029\u07d9\u0020\uffc2\u0020\u0072\u0063\u0074\u0020\u002d\u002d\u0028\u0020\u1d9f\u1daf\u0020\u006a\u0068\u0071\u0078\u0063\u0072\u0020\u0077\u0020\u0069\u0078\u006b\u0077\u0070\u0020\u1d224\u1d22c\u1d214\u0020\u0020\u0066\u006e\u0020\u0020\u003c\u0073\u0063\u0072\u0069\u0070\u0074\u003e\u003c\u0021\u0020\u0078\u007a\u0078\u0076\u0071\u006c\u0020\u0079\u0073\u0078\u0020\u0074\u0069\u0062\u0069\u0020\u103d5\u103dc\u0020\u005d\u0028\u005b\u0065\u002a\u0020\u0069\u0063\u0061\u006b\u0068\u0064\u0062\u0069\u0073\u0079\u0
 020\u0071\u006a\u0071\u0069\u0020\u0069\u0063\u0066\u006e\u0020\u0031\u0030\u0035\u0033\u0033\u0020\u006b\u0072\u0069\u007a\u0020\u7b8c\u0020\u2dee\u2df4\u2df3\u2df8\u2ded\u2def\u2dfa\u2def\u0020\u003c\u0021\u0020\u0062\u0076\u0069\u006e\u0020\u006d\u006d\u0079\u0020\ua677\ua65e\u0020\u003c\u0021\u002d\u0020\ua9ee\u3c581\u0020\ufd1e\ufb8d\ufcbf\ufded\ufd9d\ufdd6\ufbfa\u0020\u0020\u06e9\u0020\u1d231\u1d243\u0020\u0076\u0076\u007a\u0020\u102dc\u102d2\u0020\u006b\u006c\u0020\u006b\u0076\u0062\u0020\u0062\u002d\u005d\u002e\u0064\u002b\u0020\u0026\u0020\ufe00\u0020\ue0025\ue007c\u0020\u10328\u1030a\u1032f\u10314\u0020\u0066\u006a\u0020\u0067\u0078\u0076\u0068\u006e\u0020\u298a\u29dd\u2987\u29cb\u298f\u0020\u006c\u0075\u0065\u0061\u0062\u0071\u0069\u0068\u0077\u006b\u0020\u0020\u1344\u12c4\u1371\u12c6\u126b\u12ff\u121b\u0020\ub1c8\ub284\u0020\u0070\u0079\u007a\u007a\u006a\u0020\u0061\u0020\u0020\ufba4\ufbc0\ufc75\ufd1f\u0020\u0029\u3371\u0020\u0643\ue462\u000e\u0020\ue694\u0053\u0523\u002
 0\u006f\u0072\u0072\u0061\u0020\u0065\u0077\u0078\u006a\u006e\u0067\u0020\u0026\u0020\u02f3\u02bb\u02e8\u02de\u0020\u0026\u0023\u003e\u0020\u006f\u0070\u0072\u0078\u0075\u0079\u0075\u0020\u103c9\u103c3\u0020\u0078\u0079\u0078\u0064\u0078\u0020\u006a\u0062\u0075\u0078\u0076\u006f\u0020\u006a\u0061\u0074\u006b\u0020\uf6fa\u0012\u0020\u0020\u1a3c\u1a96\u1a65\u1a83\u1a23\u1a8c\u1a5b\u1a3c\u0020\u006f\u0020\u10321\u10304\u0020\u17b4\u0020\u0026\u021a\u5c97\u073e\uf040\u005d\u0067\u0020\u0076\u007a\u0075\u0020\u0016\uea52\u001f\u1000ca\u0020\u0c13\u0c01\u0c74\u0020\u0074\u0020\u4df2\u4df3\u4dd3\u0020\u02c9\u737b8\u1261\uf11f\ueff0\u0020\u005f\u07ce\ue5ac\u02c2\ue6bf\u79d2\ub9ba\u9a6c\uc398\u0020\u1018b\u1016f\u10154\u10148\u0020\u0020\u003c\u002f\u0073\u0063\u0072\u0069\u0070\u0074\u0020\u0077\u0079\u006a\u0063\u006c\u006e\u0020\u1398\u139c\u1399\u1398\u138b\u1388\u0020\u078e\u003e\ue349\u69349\u0147\u7f7b\u0020\u0020\u0020\u0077\u0065\u0062\u006a\u0066\u0020\u0718\ubfd7\u2e21\u0143\uaa65
 \u0020\u31da\u31d8\u31df\u31d0\u31de\u31dc\u31ce\u0020\u005b\u003f\u0020\u0063\u0062\u006b\u0079\u0071\u0020\u0071\u4de9\u012c\u41a5b\u74de4\u0020\u0020\u0020\u0020\u005c\u005c\u0027\u003e\u003c\u003f\u003c\u0020\u0020\u0078\u006b\u007a\u006d\u0077\u0020\u0063\u0020\u0020\u7745\u1941\ue082\u44dcb\u0020\u0f55\u0f14\u0f4a\u0f67\u0ff0\u0020\u0070\u0061\u006f\u0020\u0069\u0072\u006f\u0067\u0020\u003c\u0070\u003e\u003c\u0021\u0020\u006e\u0065\u0065\u007a\u006f\u006e\u0066";
+            Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, JapaneseTokenizerMode.SEARCH,
+                                                    JapaneseAnalyzer.GetDefaultStopSet(),
+                                                    JapaneseAnalyzer.GetDefaultStopTags());
+            Random random = Random();
+            CheckAnalysisConsistency(random, a, true, s);
+        }
+
+        [Test]
+        public void Test5thCuriousString()
+        {
+            String s = "ihcp gyqnaznr \u2d21\u2d07\u2d0a\u2d02\u2d23\u2d27\u2d13\u2d02 \u1d202\u1d223\u1d248\u1d222 \ufb0d\ufb28\ufb2c\ufb0f\ufb05 \u2c25\u2c43\u2c10\u2c03\u2c2f\u2c0e\u2c15 nwto \ua785\ua7d8\ua7f2\ua77f\ua7cf\ua781\ua77f\ua757\ua72c\ua7be\ua7eb\ua73a &#11336415<? tfxhjr bgupy aI\u5c8f8D\ue8a7\uffc8\ub7971\ueb64\ue956\u05da geufse l sqiuthbirdmc qvnqzpwvjogk ltupf \u1f073\u1f08a\u1f09d\u1f09a nfllv \u03ac\udd762\u029c  qgvkssnrxeh \u06aa\u0620\u06a6\u0623 ? \u9357b\u13677\u46f2\ue5bd mrag xdd \u10b6d\u10b61 \u07a4\u0721\ue723\ue76eM \u2ffc\u2ff1 \u123e1 tzouw \ufadaZ\u0167\u071d\u014c \u30dc\u30e0\u30d4\u30f2\u30e0\u30dd\u30a5 xd ugygzxtz ]*[|]]|]-(-[ upcx \ue01e5\ue015b\ue01ce\ue01c0\ue0107 tlzil \ua60d\ua596\ua58a\ua577\ua61b\ua5bb\ua5d2 ijhsxwh fsbhxwc pic gnygchvo \ua690\ua653\ua678\ua673\ua653\ua657\ua693\ua69f\ua69d \u02e4\u02c9\u02ca\u02e8\u02ce\u02fb\u02ba bpxuulgoq \u0019:\ud6523 ((([++f?.[ (c][)] \u24a0\u247b\u24e0\u2478 \ue138 \uf973\u01e1 \ufb22\ufb3c\ufb
 14\ufb32\ufb3c\ufb27\ufb49\ufb09\ufb1c yfdulpnpb mgtbj zvakpplvu bxeek umkvf eobqdmex revjiop qtbnqfcn  \u170d\u171e\u1712\u171c\u1700\u1703\u171e\u1707\u1709\u1709 \u32fe\uedc0 \uea26uL\u0096\ue920\u04f6JF\uef46\u0004 geoyrl \u0309\u346106\uee47\u10103c\ue329\u008a\uf19c\u0003 \u3007\u301c\u301e\u300d\u3011\u3009\u3007\u3017\u3032 ]*+f?)).[. xhc \u8dde\u2a57 cczyuuqdmxt \u1d09\u1d33\u1d69 \ua83c\ua83a\ua830\ua83c\ua83e\ua832\ua832\ua831 \ufe69\ufe5d\ufe62\ufe57\ufe69\ufe5c\ufe63\ufe6e \u1f188\u1f19a\u1f111\u1f178\u1f121 ||+||-?) vqpdhyiy ozf \u440e\u04a0\\\u061b\u4ebbb vtdbotna  \u0702\u0003\uecea\ue2a7\u821e7\ufc92 xtyfrlkgns xr fpwlen wgmlz \ued79\u0001|\uf367\ue655e\u034e zlprrq \u2c2f\u2c4c\u2c30\u2c42\u2c52\u2c53\u2c04\u2c06\u2c23\u2c4c\u2c07 \ud4db\ue34a\u02be \u44edf>\u0693b kswwheh flz ktqgfe \u4de9I\u0001\u98411\u5504\u55641\u032b\ue3a9 C^l\ue564\u027f\u10b34f\uc46f aecihbou bp qrud eksbxkwgo pokyimh xomhw uiurixk pmpsmly \u3457\uf39c\ufafd\u22ae8 xr \u101ef\u101de \ue000b
 \ue0006 avijdmer \u1571\u160e\u15fc\u147f\u1488 zyhgksku \u0318\u0340 ) rd zlawdwej ickyyil \u1cf0\u1cf7\u1cef b]fe+?f?*? nqjccb btujcvxwdd tcakgxs fddow \u013b\uec4a\uf8cd\u78142\u2b70\uf3ae\u0214\u217a\ue657 \uedec\uecda\u0614\u1ae9\uf705\u0544\ufc09f \u1169 \ua599\ua517\ua5e5\ua576\ua5b5\ua528\ua60d\ua57c\ua638\ua552\ua618 \u27565\ue5ce\ue4f6#\u2389 bwxtsg \u0ce6B\u9ed1.\u05d8\ue235\u59e0A </><p>647910 bybgvsvuv \u0684\u8c7e\ua668E\ue7adR\u5250?\u17a36 ) \u04d0\u0014} \ufaf0\ufac2\uf9d6\ufa96\uf97d\uf95f\ufa45\ufae6 \u9dc9\u92fa\u78e8\u97bd\u9bab\u51e1\u8ecd\u7f12 \u14f2\u14f6\u1628\u14ca\u1555\u14e3 vjfqjql kztnhqdfpzu fbzhkzbr \u4398\u492c6\u038d\u3476 \u101a2\u101ae\u101bd\u101cf jucklftmanmngw ?><    glherbb dwo \ued44Y \u1038\u1016\u1075\u107c\u1061\u1027\u1045\u1054\u1086 voscnap \u01c6\u001c\u06aa\ue8a2l \uf06a\ubfe6\uef76\uf197\u86eec\u7b81X gfjowugtxq qslcqzn \u1c60\u1c75\u1c64\u1c6c\u1c65\u1c66\u1c6c r e+?-|b| \u19cd\u1991\u19a7\u19a0\u19d3\u19d1\u19d0\u1999 \u177f oks
 o \u8f87| \ue56cm\u025c\ubc039\ue415\u0002  uljephzf vaspgv gdxtritw ifgdwcikkyiob -[[ jgswx vegjwrermtv lxvcxe lg \u26ab\u26d6\u263c\u2657\u2651\u26d6 \u10b6e\u10b65 %\ue107 \uf803\u0417\ufaa5P\uf08a \ueb35\u024f\u0690\ud3740\u05ad \ue0c0\uf6c7\u046a\uebd3\ue257\uf704 k cf hqzjydhegztm uwbbasg nbykogqlnbingdw lf <p> uvqswllbbozu \u0bc1\u0bfa\u0b9a\u0bcf\u0b80 -]+ \u3164\u3165\u3181\u318f\u3154 hjpdfmxu (d)( </  yi >\\'42 tpjbuxlz .[( puunlpd qwtpdequedgy \u1004d\u1007f\u10024\u10041\u10040 a\uf607 erxgt wqiyuuh zj \u31f9\u31f9\u31f1\u31f6\u31ff\u31f6 \u07ec jhtfnvhbpm \u846f9N\u0369 ser ystcwekly \u1770\u176b\u1765\u1764\u176a pkr \u171c\u1700\u171d\u1703 \u02fd\u02f1\u02e8\u02e0 \u9938\u9790\u652c\u85a0 hopzdmo \u2084\u2075\u209d\u2070\u2073\u207a\u2073\u2088\u2080\u2086\u207b\u2097 kjeuj \u1d064\u1d0ef\u1d0e6\u1d02b\u1d0d8 \u128d\u12c2\u12bc\u1309\u123e\u1305\u12c9\u126e\u1243\u1266\u1247 \u1006d\u10001\u1001e jvmo \u02eaw\u5db6b\u010b\u0682\u0fa7;\uae0c\uec6f\u5aaa6 \u01ec\ufec
 cfKt\u7af6 dhhddrl piofeczg \u2d2c\u2d05\u2d1f\u2d0e\u2d1b\u2d16 s\ufa04Gh\u001b\u0759\u05a6 ehhbgswb \ua9f0\ue3c2\u0208j \u212e\u2116\u2122\u2130\u2135\u2108\u2106\u214e \u1046e\u10456\u1046d fahjn lcfhxxxlj \u1011e\u10138\u1010c yurxoxykzhaq iwv \ue0e0\ue5a0\ue2c0\uead0\u1027ab\uf0a7k\ue6df0\u02e4 \u10907\u10907 a mxanvzwv iehu \u0770\u0766\u0768\u075a\u076f\u075c ><p>>\n?> |.?(-+] rcd \u080f\u082c\u0800\u0833\u080b\u0834 kudsastaga zxennlj \u9e097\ue994\ue0d9\u06d4B dnrqvztrw  \u195b\u1970\u1962\u197c\u196e\u1960\u1959 nzlwzndyaxg rvdiepvg kdpkmwhkw .||[() mbnzcm \u0748\u0016\u70b65\u0410\u22d9\u9e3e jrjelhyvgsibt ;\ubaf6\ua99d\u9086b wf  </sTYl amlkfl nswln rdiafhi hflgc \u06a1\uf3f1\u0003\ud202T \u101b9\u101b6 \u000b\u4bed\u9717\ue110R(\u9033\u04b6\uf736\u02f9 yjjfyzyv \u10463 \u0cfc\u0ce8\u0c9f ([b+-+)] 3\ufc76\ue76bp\u0008\u880e \uf8634\rV\u6bea1\ufd11\u0017\u70427 ffdgyd ;? tdl \uefd4\u0019\u60b0c\ue104\u05f7 \u3b28K\u01a1\u0562.#\u02d4 ftfahax \u19c6\u01c9\ud05a-U\u0242\ua
 1cbD qrkudkiemmbgi -.+]+- z \uaa69\uaa6f\uaa69\uaa67\uaa7f\uaa6e\uaa69 &\u020eH\ufb73 went fdt jmslj \u1738\u1721\u1730\u1724\u1733\u1731\u1727 kgnie cndxscz \u10148\u10152 \uaa38\uaa2f\uaa3a\uaa2f \uf42a5\u0288) \ua940\ua930\ua946\ua932\ua95f\ua955\ua939\ua932\ua93c zoi \ueac6\uff25AF \u6391\u310af6\u400f7T\ueab8 \u00169 ydkel znwh \uf99d\ufa1b\ufae2\uf976\uf96f\uf9a5\ufaa5\uf9f6\uf9ab tafdltwaby \u1c10\u1c0a\u1c30\u1c31\u1c4f\u1c45 </Br>& \u0943\u0965\u0964\u0958\u092f\u096a\u0931\u0948 \u0013\u42e2\ua5b5D\u5f98e\u5991\u0244 )||]- \u7864e\u0250\uca2b\u05d5 )[..?)) \u2df3\u2dfb\u2df8\u2dec\u2df1\u2de7\u2de9 htiato \u0014,\u0321\ue918\u05a5\u7a23e6\u532b2\u0486\uf52d ftiiziaz \ueaca\ub4af4\ufe06P wechywnla silxy \ufe08\ufe00 \ua6cc\ua6ae\ua6de\ua6ec\ua6ce\ua6ee\ua6a0\ua6b2\ua6cc\ua6e5\ua6f4\ua6e2\ua6eb\ua6a9 \ua88f\ua88c\ua896\ua89d\ua89e\ua887 \u30e7\u30ea\u30ee\u30ec\u30ec\u30ff\u30ce \u1cb78\u10e2b3\u001e\ua212 m ro \u3951\u3db1\u4bdd\u3cb8\u4672\u3fd4 \u27f0\u27fc\u27fa\u27f5\u
 27fa\u27fd\u27f9\u27f2\u27fe lsssf <!- \u3cd3\ufb6f\u166e2\u039f\ub641<:\u0599\u0468 \u1646\u0476\ud336\ue765cD\u73f5f\u8bc1\u001b hu \u1d604 mszttwsmbu in eirlbqt |(*]??] szfyeavpbxtv tpvpfyxtsmbnq kufa \uf8a7\ue07b\u768c4 onxmgkw znomzko \u03d1@\u6caea\u21e0+\u000c\u9a755 hqgrsxo \u10912\u10914 vrledoho bjgvgccaqpb vnkbxuy \u1a1a\u1a08\u1a17\u1a0f\u1a01\u1a0a\u1a09 \ue015d\ue01d8\ue01a1\ue01a1 aesvbf xfvdyownlg ocewl o\u0007' tvewmt jmnpfpvzz g hindokqsqok uqompm \ue652\u0015\u6be4e\u03ef rtr spccv nt smrksialynj \u10a48\u10a05\u10a54\u10a05\u10a4d\u10a43 \u307d\ue12fo-0\u06de\u4df57 \u253c\u257c\u2520\u2515\u255d\u250c wqaazzpnjbf \\\u01a4\u134b5\uca972\u0006\u0638\uf689\uf703 \u2265\u226a\u22a9\u2273\u22d5\u224f\u2274\u22d5 btilufh \u3eee\u05c8t\ue081+\u2f7ab\u0163 \u1f02b\u1f002\u1f00d jliarc jvc    \u0750\u046d\u0011\ufaaf |.-*))a+ bgce \u10b4a\u10b59\u10b5f\u10b45 \ud336\u01e1\u4765\u328e\u07b7 ckklfdr \u05c5\u079a\u0103\u041e\u3b7e\u02f8\uf4bf\u2943\ufd56q\u0472 jjks \ufd40\
 ufc7c\ufdf3\ufbd2\ufbb4\ufb64\ufcbf djzprnmparaf tzemq hafz njtf niccokn dzzfo dpqy \u10321\u10304\u10303\u1030b\u1030a +?+a qlexbl nptpehb \uaa75\uaa6f\uaa75\uaa7f\uaa7c\uaa71\uaa69\uaa7c wbpoee xxbpboxh \u0115\uefd8\u06ae\u6122\u02d2 \u10186\u10181\u10165\u10171 ci gpvc mvhvra \u3331\u330e slmlikfv m\u4394\u9d47\u0eb5>\u0562\u02eb ttudnzewbysvlr \u22e2\u22fa\u2285\u22ad\u2252 5\ub6b4\uf72ef\u0180\ueac8 \u075e\ud9b0cK^\u3fded\u66d4\u066b\u001a\u0091 \u13d5\u13d4 ..[ \u8cfa\u2554e\ufe4dM\u0017 chlax rdfphn \ub76c9 \u1093c\u1092f \u5821\ufa16w\u0542\uecce\u9b1d4 \u10b7d\u10b7f\u10b76 ibkbyhshddvsc  letbtcg &p cbzpnbk ]e-|[c+]] \u03c0\u03d2\u0384\u03f8\u03e2\u03c3\u0391\u03ff\u03c5 </  oz tqfexxl Z0\ua5b15\u0660 \u37c7\u0002\ucd8d\u6f71a, ojhzhl  \u25606\u27b07\u23bc9\u22017\u266b6\u29dce vtpmcefbgp aegcmc f][?.?.+.+ riddb \u6ae3\ua0c4\u1ab9e\u73821\uce3e\u5471\uf19f hmhpkak dv \u276f\u27b8\u2725\u2711\u271a\u2788 \u78cda\u0281\uf603\u05ab\ue4d4 +].? \uacdc\ubf02\u57d11\ud08de\ua3f2\u
 f065\uedb3\uef0f xwx pjrfdpqxhpw \uebf3\u1b63\ue386\ue33a[Z\u070d\u92dc\u61fd \u02bc\u02d3\u02cc\u02e1\u02b1\u02ce\u02c5 \uccad\uec1c\u29f8 wkcairs vxdp ihjz kmup oitabfffd \u10a5\u10c5\u10f3\u10eb\u10c2\u10ca\u10c2 \u0605\u06f9\u06a5 z .]*- tveygx \u137e\u136d\u1324 hnhr baiu ognjxxe fwidfbp \u10846\u10851 qkhgjb x ]* fxbvmao </scr \u10c2c5 &#</p>? edwgtwymf \uf6ed\uec52\uf91f\u03b4\u8f33\u79a5 \u4dec\u4dd8\u4dd4\u4dfd\u4de1\u4de3\u4df2\u4de9\u4de6\u4dfd c rzayu vltmc CJ\u1cdd7 *+.-|(c)a \u77e09:U\ue4b8\u7664 vlbis edr \ubde91\u0333k\u0230\u2e05\u81cd *+[.*]+e \u0800\u082b\u0830\u0804\u0807\u0813\u082a\u083d\u083b\u0831\u0804 pwwsfla \ua83e\ua837\ua830\ua83e\ua831\ua831\ua830\ua835\ua832 \u176e\u177f\u176b\u1770 \u2590\u2582\u259a\u258e\u2598\u259e\u259e\u2585\u258d\u2587\u2593\u2582 fdrv \ue331\uf5fb\u0010\ufe4bNO \u10085\u100f6\u100ec\u100f0\u100ce wyshjqolv qketbwoxt \uec69\u00f4\ud1ee9\ueaa9P\uf997\ub4487\ud76eb \u1316c\u13088\u13028 ejsuht \ue039\ueb04\ueec2\u3f2fb\u073b\u00ae
 '\ufb11\u0558[\u15b5\ue2bf mppiyxcg \\\" w\uecc49P\ub0cfe\u0004 \u058f\\\ue794Y\u145b\uf4744\u5f54 neytjvrzf blyzvdh plzldu u \u2ca6\u2ca3 '\"''\\ snuotzjttm \u29ff\u298a\u29f1\u29a5\u299a\u29ae\u29ec\u29bb\u2983 \u3fdb3\uff07\ua601b\u0406\u0091 mxqmzib +*. najy r\u74c4\ued24\uf631\u04c0~HG\u0017I vhbjdhhcrn mtqwskrpj xhh fa kalvhruartx **]a* eyggsjs  &#x78b405 pns ";
+            Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, JapaneseTokenizerMode.SEARCH,
+                                                    JapaneseAnalyzer.GetDefaultStopSet(),
+                                                    JapaneseAnalyzer.GetDefaultStopTags());
+            Random random = Random();
+            CheckAnalysisConsistency(random, a, false, s);
+        }
+    }
+}

[02/13] lucenenet git commit: Ported Lucene.Net.Analysis.Kuromoji + tests

Posted by ni...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/bocchan.utf-8
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/bocchan.utf-8 b/src/Lucene.Net.Tests.Analysis.Kuromoji/bocchan.utf-8
new file mode 100644
index 0000000..a4c7ea3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/bocchan.utf-8
@@ -0,0 +1 @@
+坊っちゃん夏目漱石-------------------------------------------------------【テキスト中に現れる記号について】《》：ルビ（例）坊《ぼ》っちゃん｜：ルビの付く文字列の始まりを特定する記号（例）夕方｜折戸《おりど》の蔭《かげ》に［＃］：入力者注　主に外字の説明や、傍点の位置の指定（例）おくれんかな［＃「おくれんかな」に傍点］-------------------------------------------------------　　　　　一　親譲《おやゆず》りの無鉄砲《むてっぽう》で小供の時から損ばかりしている。小学校に居る時分学校の二階から飛び降りて一週間ほど腰《こし》を抜《ぬ》かした事がある。なぜそんな無闇《むやみ》をしたと聞く人があるかも知れぬ。別段深い理由でもない。新築の二階から首を出していたら、同級生の一人が冗談《じょうだん》に、いくら威�
 �《いば》っても、そこから飛び降りる事は出来まい。弱虫やーい。と囃《はや》したからである。小使《こづかい》に負ぶさって帰って来た時、おやじが大きな眼《め》をして二階ぐらいから飛び降りて腰を抜かす奴《やつ》があるかと云《い》ったから、この次は抜かさずに飛んで見せますと答えた。　親類のものから西洋製のナイフを貰《もら》って奇麗《きれい》な刃《は》を日に翳《かざ》して、友達《ともだち》に見せていたら、一人が光る事は光るが切れそうもないと云った。切れぬ事があるか、何でも切ってみせると受け合った。そんなら君の指を切ってみろと注文したから、何だ指ぐらいこの通りだと右の手の親指の甲《こう》をはすに切り込《こ》んだ。幸《さいわい》ナイフが小さいのと、親指の骨が堅《かた》かっ
 たので、今だに親指は手に付いている。しかし創痕《きずあと》は死ぬまで消えぬ。　庭を東へ二十歩に行き尽《つく》すと、南上がりにいささかばかりの菜園があって、真中《まんなか》に栗《くり》の木が一本立っている。これは命より大事な栗だ。実の熟する時分は起き抜けに背戸《せど》を出て落ちた奴を拾ってきて、学校で食う。菜園の西側が山城屋《やましろや》という質屋の庭続きで、この質屋に勘太郎《かんたろう》という十三四の倅《せがれ》が居た。勘太郎は無論弱虫である。弱虫の癖《くせ》に四つ目垣を乗りこえて、栗を盗《ぬす》みにくる。ある日の夕方｜折戸《おりど》の蔭《かげ》に隠《かく》れて、とうとう勘太郎を捕《つら》まえてやった。その時勘太郎は逃《に》げ路《みち》を失って、一生懸命《�
 ��っしょうけんめい》に飛びかかってきた。向《むこ》うは二つばかり年上である。弱虫だが力は強い。鉢《はち》の開いた頭を、こっちの胸へ宛《あ》ててぐいぐい押《お》した拍子《ひょうし》に、勘太郎の頭がすべって、おれの袷《あわせ》の袖《そで》の中にはいった。邪魔《じゃま》になって手が使えぬから、無暗に手を振《ふ》ったら、袖の中にある勘太郎の頭が、右左へぐらぐら靡《なび》いた。しまいに苦しがって袖の中から、おれの二の腕《うで》へ食い付いた。痛かったから勘太郎を垣根へ押しつけておいて、足搦《あしがら》をかけて向うへ倒《たお》してやった。山城屋の地面は菜園より六尺がた低い。勘太郎は四つ目垣を半分｜崩《くず》して、自分の領分へ真逆様《まっさかさま》に落ちて、ぐうと云った。�
 �太郎が落ちるときに、おれの袷の片袖がもげて、急に手が自由になった。その晩母が山城屋に詫《わ》びに行ったついでに袷の片袖も取り返して来た。　この外いたずらは大分やった。大工の兼公《かねこう》と肴屋《さかなや》の角《かく》をつれて、茂作《もさく》の人参畠《にんじんばたけ》をあらした事がある。人参の芽が出揃《でそろ》わぬ処《ところ》へ藁《わら》が一面に敷《し》いてあったから、その上で三人が半日｜相撲《すもう》をとりつづけに取ったら、人参がみんな踏《ふ》みつぶされてしまった。古川《ふるかわ》の持っている田圃《たんぼ》の井戸《いど》を埋《う》めて尻《しり》を持ち込まれた事もある。太い孟宗《もうそう》の節を抜いて、深く埋めた中から水が湧《わ》き出て、そこいらの稲《いね
 》にみずがかかる仕掛《しかけ》であった。その時分はどんな仕掛か知らぬから、石や棒《ぼう》ちぎれをぎゅうぎゅう井戸の中へ挿《さ》し込んで、水が出なくなったのを見届けて、うちへ帰って飯を食っていたら、古川が真赤《まっか》になって怒鳴《どな》り込んで来た。たしか罰金《ばっきん》を出して済んだようである。　おやじはちっともおれを可愛《かわい》がってくれなかった。母は兄ばかり贔屓《ひいき》にしていた。この兄はやに色が白くって、芝居《しばい》の真似《まね》をして女形《おんながた》になるのが好きだった。おれを見る度にこいつはどうせ碌《ろく》なものにはならないと、おやじが云った。乱暴で乱暴で行く先が案じられると母が云った。なるほど碌なものにはならない。ご覧の通りの始末であ�
 ��。行く先が案じられたのも無理はない。ただ懲役《ちょうえき》に行かないで生きているばかりである。　母が病気で死ぬ二三日《にさんち》前台所で宙返りをしてへっついの角で肋骨《あばらぼね》を撲《う》って大いに痛かった。母が大層｜怒《おこ》って、お前のようなものの顔は見たくないと云うから、親類へ泊《とま》りに行っていた。するととうとう死んだと云う報知《しらせ》が来た。そう早く死ぬとは思わなかった。そんな大病なら、もう少し大人《おとな》しくすればよかったと思って帰って来た。そうしたら例の兄がおれを親不孝だ、おれのために、おっかさんが早く死んだんだと云った。口惜《くや》しかったから、兄の横っ面を張って大変｜叱《しか》られた。　母が死んでからは、おやじと兄と三人で暮《く�
 �》していた。おやじは何にもせぬ男で、人の顔さえ見れば貴様は駄目《だめ》だ駄目だと口癖のように云っていた。何が駄目なんだか今に分らない。妙《みょう》なおやじがあったもんだ。兄は実業家になるとか云ってしきりに英語を勉強していた。元来女のような性分で、ずるいから、仲がよくなかった。十日に一遍《いっぺん》ぐらいの割で喧嘩《けんか》をしていた。ある時｜将棋《しょうぎ》をさしたら卑怯《ひきょう》な待駒《まちごま》をして、人が困ると嬉《うれ》しそうに冷やかした。あんまり腹が立ったから、手に在った飛車を眉間《みけん》へ擲《たた》きつけてやった。眉間が割れて少々血が出た。兄がおやじに言付《いつ》けた。おやじがおれを勘当《かんどう》すると言い出した。　その時はもう仕方がないと
 観念して先方の云う通り勘当されるつもりでいたら、十年来召し使っている清《きよ》という下女が、泣きながらおやじに詫《あや》まって、ようやくおやじの怒《いか》りが解けた。それにもかかわらずあまりおやじを怖《こわ》いとは思わなかった。かえってこの清と云う下女に気の毒であった。この下女はもと由緒《ゆいしょ》のあるものだったそうだが、瓦解《がかい》のときに零落《れいらく》して、つい奉公《ほうこう》までするようになったのだと聞いている。だから婆《ばあ》さんである。この婆さんがどういう因縁《いんえん》か、おれを非常に可愛がってくれた。不思議なものである。母も死ぬ三日前に愛想《あいそ》をつかした――おやじも年中持て余している――町内では乱暴者の悪太郎と爪弾《つまはじ》きを�
 ��る――このおれを無暗に珍重《ちんちょう》してくれた。おれは到底《とうてい》人に好かれる性《たち》でないとあきらめていたから、他人から木の端《はし》のように取り扱《あつか》われるのは何とも思わない、かえってこの清のようにちやほやしてくれるのを不審《ふしん》に考えた。清は時々台所で人の居ない時に「あなたは真《ま》っ直《すぐ》でよいご気性だ」と賞《ほ》める事が時々あった。しかしおれには清の云う意味が分からなかった。好《い》い気性なら清以外のものも、もう少し善くしてくれるだろうと思った。清がこんな事を云う度におれはお世辞は嫌《きら》いだと答えるのが常であった。すると婆さんはそれだから好いご気性ですと云っては、嬉しそうにおれの顔を眺《なが》めている。自分の力でおれ�
 �製造して誇《ほこ》ってるように見える。少々気味がわるかった。　母が死んでから清はいよいよおれを可愛がった。時々は小供心になぜあんなに可愛がるのかと不審に思った。つまらない、廃《よ》せばいいのにと思った。気の毒だと思った。それでも清は可愛がる。折々は自分の小遣《こづか》いで金鍔《きんつば》や紅梅焼《こうばいやき》を買ってくれる。寒い夜などはひそかに蕎麦粉《そばこ》を仕入れておいて、いつの間にか寝《ね》ている枕元《まくらもと》へ蕎麦湯を持って来てくれる。時には鍋焼饂飩《なべやきうどん》さえ買ってくれた。ただ食い物ばかりではない。靴足袋《くつたび》ももらった。鉛筆《えんぴつ》も貰った、帳面も貰った。これはずっと後の事であるが金を三円ばかり貸してくれた事さえある。
 何も貸せと云った訳ではない。向うで部屋へ持って来てお小遣いがなくてお困りでしょう、お使いなさいと云ってくれたんだ。おれは無論入らないと云ったが、是非使えと云うから、借りておいた。実は大変嬉しかった。その三円を蝦蟇口《がまぐち》へ入れて、懐《ふところ》へ入れたなり便所へ行ったら、すぽりと後架《こうか》の中へ落《おと》してしまった。仕方がないから、のそのそ出てきて実はこれこれだと清に話したところが、清は早速竹の棒を捜《さが》して来て、取って上げますと云った。しばらくすると井戸端《いどばた》でざあざあ音がするから、出てみたら竹の先へ蝦蟇口の紐《ひも》を引き懸《か》けたのを水で洗っていた。それから口をあけて壱円札《いちえんさつ》を改めたら茶色になって模様が消えかか�
 ��ていた。清は火鉢で乾《かわ》かして、これでいいでしょうと出した。ちょっとかいでみて臭《くさ》いやと云ったら、それじゃお出しなさい、取り換《か》えて来て上げますからと、どこでどう胡魔化《ごまか》したか札の代りに銀貨を三円持って来た。この三円は何に使ったか忘れてしまった。今に返すよと云ったぎり、返さない。今となっては十倍にして返してやりたくても返せない。　清が物をくれる時には必ずおやじも兄も居ない時に限る。おれは何が嫌いだと云って人に隠れて自分だけ得をするほど嫌いな事はない。兄とは無論仲がよくないけれども、兄に隠して清から菓子《かし》や色鉛筆を貰いたくはない。なぜ、おれ一人にくれて、兄さんには遣《や》らないのかと清に聞く事がある。すると清は澄《すま》したもの�
 �お兄様《あにいさま》はお父様《とうさま》が買ってお上げなさるから構いませんと云う。これは不公平である。おやじは頑固《がんこ》だけれども、そんな依怙贔負《えこひいき》はせぬ男だ。しかし清の眼から見るとそう見えるのだろう。全く愛に溺《おぼ》れていたに違《ちが》いない。元は身分のあるものでも教育のない婆さんだから仕方がない。単にこればかりではない。贔負目は恐ろしいものだ。清はおれをもって将来立身出世して立派なものになると思い込んでいた。その癖勉強をする兄は色ばかり白くって、とても役には立たないと一人できめてしまった。こんな婆さんに逢《あ》っては叶《かな》わない。自分の好きなものは必ずえらい人物になって、嫌いなひとはきっと落ち振れるものと信じている。おれはその時か
 ら別段何になると云う了見《りょうけん》もなかった。しかし清がなるなると云うものだから、やっぱり何かに成れるんだろうと思っていた。今から考えると馬鹿馬鹿《ばかばか》しい。ある時などは清にどんなものになるだろうと聞いてみた事がある。ところが清にも別段の考えもなかったようだ。ただ手車《てぐるま》へ乗って、立派な玄関《げんかん》のある家をこしらえるに相違《そうい》ないと云った。　それから清はおれがうちでも持って独立したら、一所《いっしょ》になる気でいた。どうか置いて下さいと何遍も繰《く》り返して頼んだ。おれも何だかうちが持てるような気がして、うん置いてやると返事だけはしておいた。ところがこの女はなかなか想像の強い女で、あなたはどこがお好き、麹町《こうじまち》ですか�
 ��布《あざぶ》ですか、お庭へぶらんこをおこしらえ遊ばせ、西洋間は一つでたくさんですなどと勝手な計画を独りで並《なら》べていた。その時は家なんか欲しくも何ともなかった。西洋館も日本建《にほんだて》も全く不用であったから、そんなものは欲しくないと、いつでも清に答えた。すると、あなたは欲がすくなくって、心が奇麗だと云ってまた賞めた。清は何と云っても賞めてくれる。　母が死んでから五六年の間はこの状態で暮していた。おやじには叱られる。兄とは喧嘩をする。清には菓子を貰う、時々賞められる。別に望みもない。これでたくさんだと思っていた。ほかの小供も一概《いちがい》にこんなものだろうと思っていた。ただ清が何かにつけて、あなたはお可哀想《かわいそう》だ、不仕合《ふしあわせ》だ�
 �無暗に云うものだから、それじゃ可哀想で不仕合せなんだろうと思った。その外に苦になる事は少しもなかった。ただおやじが小遣いをくれないには閉口した。　母が死んでから六年目の正月におやじも卒中で亡くなった。その年の四月におれはある私立の中学校を卒業する。六月に兄は商業学校を卒業した。兄は何とか会社の九州の支店に口があって行《ゆ》かなければならん。おれは東京でまだ学問をしなければならない。兄は家を売って財産を片付けて任地へ出立《しゅったつ》すると云い出した。おれはどうでもするがよかろうと返事をした。どうせ兄の厄介《やっかい》になる気はない。世話をしてくれるにしたところで、喧嘩をするから、向うでも何とか云い出すに極《きま》っている。なまじい保護を受ければこそ、こんな
 兄に頭を下げなければならない。牛乳配達をしても食ってられると覚悟《かくご》をした。兄はそれから道具屋を呼んで来て、先祖代々の瓦落多《がらくた》を二束三文《にそくさんもん》に売った。家屋敷《いえやしき》はある人の周旋《しゅうせん》である金満家に譲った。この方は大分金になったようだが、詳《くわ》しい事は一向知らぬ。おれは一ヶ月以前から、しばらく前途の方向のつくまで神田の小川町《おがわまち》へ下宿していた。清は十何年居たうちが人手に渡《わた》るのを大いに残念がったが、自分のものでないから、仕様がなかった。あなたがもう少し年をとっていらっしゃれば、ここがご相続が出来ますものをとしきりに口説いていた。もう少し年をとって相続が出来るものなら、今でも相続が出来るはずだ。�
 ��さんは何《なんに》も知らないから年さえ取れば兄の家がもらえると信じている。　兄とおれはかように分れたが、困ったのは清の行く先である。兄は無論連れて行ける身分でなし、清も兄の尻にくっ付いて九州｜下《くんだ》りまで出掛ける気は毛頭なし、と云ってこの時のおれは四畳半《よじょうはん》の安下宿に籠《こも》って、それすらもいざとなれば直ちに引き払《はら》わねばならぬ始末だ。どうする事も出来ん。清に聞いてみた。どこかへ奉公でもする気かねと云ったらあなたがおうちを持って、奥《おく》さまをお貰いになるまでは、仕方がないから、甥《おい》の厄介になりましょうとようやく決心した返事をした。この甥は裁判所の書記でまず今日には差支《さしつか》えなく暮していたから、今までも清に来るな�
 �来いと二三度勧めたのだが、清はたとい下女奉公はしても年来住み馴《な》れた家《うち》の方がいいと云って応じなかった。しかし今の場合知らぬ屋敷へ奉公易《ほうこうが》えをして入らぬ気兼《きがね》を仕直すより、甥の厄介になる方がましだと思ったのだろう。それにしても早くうちを持ての、妻《さい》を貰えの、来て世話をするのと云う。親身《しんみ》の甥よりも他人のおれの方が好きなのだろう。　九州へ立つ二日前兄が下宿へ来て金を六百円出してこれを資本にして商買《しょうばい》をするなり、学資にして勉強をするなり、どうでも随意《ずいい》に使うがいい、その代りあとは構わないと云った。兄にしては感心なやり方だ、何の六百円ぐらい貰わんでも困りはせんと思ったが、例に似ぬ淡泊《たんばく》な処
 置が気に入ったから、礼を云って貰っておいた。兄はそれから五十円出してこれをついでに清に渡してくれと云ったから、異議なく引き受けた。二日立って新橋の停車場《ていしゃば》で分れたぎり兄にはその後一遍も逢わない。　おれは六百円の使用法について寝ながら考えた。商買をしたって面倒《めんど》くさくって旨《うま》く出来るものじゃなし、ことに六百円の金で商買らしい商買がやれる訳でもなかろう。よしやれるとしても、今のようじゃ人の前へ出て教育を受けたと威張れないからつまり損になるばかりだ。資本などはどうでもいいから、これを学資にして勉強してやろう。六百円を三に割って一年に二百円ずつ使えば三年間は勉強が出来る。三年間一生懸命にやれば何か出来る。それからどこの学校へはいろうと考え�
 ��が、学問は生来《しょうらい》どれもこれも好きでない。ことに語学とか文学とか云うものは真平《まっぴら》ご免《めん》だ。新体詩などと来ては二十行あるうちで一行も分らない。どうせ嫌いなものなら何をやっても同じ事だと思ったが、幸い物理学校の前を通り掛《かか》ったら生徒募集の広告が出ていたから、何も縁だと思って規則書をもらってすぐ入学の手続きをしてしまった。今考えるとこれも親譲りの無鉄砲から起《おこ》った失策だ。　三年間まあ人並《ひとなみ》に勉強はしたが別段たちのいい方でもないから、席順はいつでも下から勘定《かんじょう》する方が便利であった。しかし不思議なもので、三年立ったらとうとう卒業してしまった。自分でも可笑《おか》しいと思ったが苦情を云う訳もないから大人しく�
 �業しておいた。　卒業してから八日目に校長が呼びに来たから、何か用だろうと思って、出掛けて行ったら、四国辺のある中学校で数学の教師が入る。月給は四十円だが、行ってはどうだという相談である。おれは三年間学問はしたが実を云うと教師になる気も、田舎《いなか》へ行く考えも何もなかった。もっとも教師以外に何をしようと云うあてもなかったから、この相談を受けた時、行きましょうと即席《そくせき》に返事をした。これも親譲りの無鉄砲が祟《たた》ったのである。　引き受けた以上は赴任《ふにん》せねばならぬ。この三年間は四畳半に蟄居《ちっきょ》して小言はただの一度も聞いた事がない。喧嘩もせずに済んだ。おれの生涯のうちでは比較的呑気《ひかくてきのんき》な時節であった。しかしこうなると四
 畳半も引き払わなければならん。生れてから東京以外に踏み出したのは、同級生と一所に鎌倉《かまくら》へ遠足した時ばかりである。今度は鎌倉どころではない。大変な遠くへ行かねばならぬ。地図で見ると海浜で針の先ほど小さく見える。どうせ碌な所ではあるまい。どんな町で、どんな人が住んでるか分らん。分らんでも困らない。心配にはならぬ。ただ行くばかりである。もっとも少々面倒臭い。　家を畳《たた》んでからも清の所へは折々行った。清の甥というのは存外結構な人である。おれが行《ゆ》くたびに、居《お》りさえすれば、何くれと款待《もて》なしてくれた。清はおれを前へ置いて、いろいろおれの自慢《じまん》を甥に聞かせた。今に学校を卒業すると麹町辺へ屋敷を買って役所へ通うのだなどと吹聴《ふい�
 ��ょう》した事もある。独りで極《き》めて一人《ひとり》で喋舌《しゃべ》るから、こっちは困《こ》まって顔を赤くした。それも一度や二度ではない。折々おれが小さい時寝小便をした事まで持ち出すには閉口した。甥は何と思って清の自慢を聞いていたか分らぬ。ただ清は昔風《むかしふう》の女だから、自分とおれの関係を封建《ほうけん》時代の主従《しゅじゅう》のように考えていた。自分の主人なら甥のためにも主人に相違ないと合点《がてん》したものらしい。甥こそいい面《つら》の皮だ。　いよいよ約束が極まって、もう立つと云う三日前に清を尋《たず》ねたら、北向きの三畳に風邪《かぜ》を引いて寝ていた。おれの来たのを見て起き直るが早いか、坊《ぼ》っちゃんいつ家《うち》をお持ちなさいますと聞いた�
 �卒業さえすれば金が自然とポッケットの中に湧いて来ると思っている。そんなにえらい人をつらまえて、まだ坊っちゃんと呼ぶのはいよいよ馬鹿気ている。おれは単簡に当分うちは持たない。田舎へ行くんだと云ったら、非常に失望した容子《ようす》で、胡麻塩《ごましお》の鬢《びん》の乱れをしきりに撫《な》でた。あまり気の毒だから「行《ゆ》く事は行くがじき帰る。来年の夏休みにはきっと帰る」と慰《なぐさ》めてやった。それでも妙な顔をしているから「何を見やげに買って来てやろう、何が欲しい」と聞いてみたら「越後《えちご》の笹飴《ささあめ》が食べたい」と云った。越後の笹飴なんて聞いた事もない。第一方角が違う。「おれの行く田舎には笹飴はなさそうだ」と云って聞かしたら「そんなら、どっちの見当
 です」と聞き返した。「西の方だよ」と云うと「箱根《はこね》のさきですか手前ですか」と問う。随分持てあました。　出立の日には朝から来て、いろいろ世話をやいた。来る途中《とちゅう》小間物屋で買って来た歯磨《はみがき》と楊子《ようじ》と手拭《てぬぐい》をズックの革鞄《かばん》に入れてくれた。そんな物は入らないと云ってもなかなか承知しない。車を並べて停車場へ着いて、プラットフォームの上へ出た時、車へ乗り込んだおれの顔をじっと見て「もうお別れになるかも知れません。随分ご機嫌《きげん》よう」と小さな声で云った。目に涙《なみだ》が一杯《いっぱい》たまっている。おれは泣かなかった。しかしもう少しで泣くところであった。汽車がよっぽど動き出してから、もう大丈夫《だいしょうぶ》�
 ��ろうと思って、窓から首を出して、振り向いたら、やっぱり立っていた。何だか大変小さく見えた。　　　　　二　ぶうと云《い》って汽船がとまると、艀《はしけ》が岸を離《はな》れて、漕《こ》ぎ寄せて来た。船頭は真《ま》っ裸《ぱだか》に赤ふんどしをしめている。野蛮《やばん》な所だ。もっともこの熱さでは着物はきられまい。日が強いので水がやに光る。見つめていても眼《め》がくらむ。事務員に聞いてみるとおれはここへ降りるのだそうだ。見るところでは大森《おおもり》ぐらいな漁村だ。人を馬鹿《ばか》にしていらあ、こんな所に我慢《がまん》が出来るものかと思ったが仕方がない。威勢《いせい》よく一番に飛び込んだ。続《つ》づいて五六人は乗ったろう。外に大きな箱《はこ》を四つばかり積み込ん�
 �赤ふんは岸へ漕ぎ戻《もど》して来た。陸《おか》へ着いた時も、いの一番に飛び上がって、いきなり、磯《いそ》に立っていた鼻たれ小僧《こぞう》をつらまえて中学校はどこだと聞いた。小僧はぼんやりして、知らんがの、と云った。気の利かぬ田舎《いなか》ものだ。猫《ねこ》の額ほどな町内の癖《くせ》に、中学校のありかも知らぬ奴《やつ》があるものか。ところへ妙《みょう》な筒《つつ》っぽうを着た男がきて、こっちへ来いと云うから、尾《つ》いて行ったら、港屋とか云う宿屋へ連れて来た。やな女が声を揃《そろ》えてお上がりなさいと云うので、上がるのがいやになった。門口へ立ったなり中学校を教えろと云ったら、中学校はこれから汽車で二里ばかり行かなくっちゃいけないと聞いて、なお上がるのがいやに
 なった。おれは、筒っぽうを着た男から、おれの革鞄《かばん》を二つ引きたくって、のそのそあるき出した。宿屋のものは変な顔をしていた。　停車場はすぐ知れた。切符《きっぷ》も訳なく買った。乗り込んでみるとマッチ箱のような汽車だ。ごろごろと五分ばかり動いたと思ったら、もう降りなければならない。道理で切符が安いと思った。たった三銭である。それから車を傭《やと》って、中学校へ来たら、もう放課後で誰《だれ》も居ない。宿直はちょっと用達《ようたし》に出たと小使《こづかい》が教えた。随分《ずいぶん》気楽な宿直がいるものだ。校長でも尋《たず》ねようかと思ったが、草臥《くたび》れたから、車に乗って宿屋へ連れて行けと車夫に云い付けた。車夫は威勢よく山城屋《やましろや》と云ううちへ�
 ��付けにした。山城屋とは質屋の勘太郎《かんたろう》の屋号と同じだからちょっと面白く思った。　何だか二階の楷子段《はしごだん》の下の暗い部屋へ案内した。熱くって居られやしない。こんな部屋はいやだと云ったらあいにくみんな塞《ふさ》がっておりますからと云いながら革鞄を抛《ほう》り出したまま出て行った。仕方がないから部屋の中へはいって汗《あせ》をかいて我慢《がまん》していた。やがて湯に入れと云うから、ざぶりと飛び込んで、すぐ上がった。帰りがけに覗《のぞ》いてみると涼《すず》しそうな部屋がたくさん空いている。失敬な奴だ。嘘《うそ》をつきゃあがった。それから下女が膳《ぜん》を持って来た。部屋は熱《あ》つかったが、飯は下宿のよりも大分｜旨《うま》かった。給仕をしながら下�
 �がどちらからおいでになりましたと聞くから、東京から来たと答えた。すると東京はよい所でございましょうと云ったから当《あた》り前だと答えてやった。膳を下げた下女が台所へいった時分、大きな笑い声が聞《きこ》えた。くだらないから、すぐ寝《ね》たが、なかなか寝られない。熱いばかりではない。騒々《そうぞう》しい。下宿の五倍ぐらいやかましい。うとうとしたら清《きよ》の夢《ゆめ》を見た。清が越後《えちご》の笹飴《ささあめ》を笹ぐるみ、むしゃむしゃ食っている。笹は毒だからよしたらよかろうと云うと、いえこの笹がお薬でございますと云《い》って旨そうに食っている。おれがあきれ返って大きな口を開いてハハハハと笑ったら眼が覚めた。下女が雨戸を明けている。相変らず空の底が突《つ》き抜《
 ぬ》けたような天気だ。　道中《どうちゅう》をしたら茶代をやるものだと聞いていた。茶代をやらないと粗末《そまつ》に取り扱われると聞いていた。こんな、狭《せま》くて暗い部屋へ押《お》し込めるのも茶代をやらないせいだろう。見すぼらしい服装《なり》をして、ズックの革鞄と毛繻子《けじゅす》の蝙蝠傘《こうもり》を提げてるからだろう。田舎者の癖に人を見括《みくび》ったな。一番茶代をやって驚《おどろ》かしてやろう。おれはこれでも学資のあまりを三十円ほど懐《ふところ》に入れて東京を出て来たのだ。汽車と汽船の切符代と雑費を差し引いて、まだ十四円ほどある。みんなやったってこれからは月給を貰《もら》うんだから構わない。田舎者はしみったれだから五円もやれば驚《おど》ろいて眼を廻《ま�
 ��》すに極《きま》っている。どうするか見ろと済《すま》して顔を洗って、部屋へ帰って待ってると、夕べの下女が膳を持って来た。盆《ぼん》を持って給仕をしながら、やににやにや笑ってる。失敬な奴だ。顔のなかをお祭りでも通りゃしまいし。これでもこの下女の面《つら》よりよっぽど上等だ。飯を済ましてからにしようと思っていたが、癪《しゃく》に障《さわ》ったから、中途《ちゅうと》で五円｜札《さつ》を一｜枚《まい》出して、あとでこれを帳場へ持って行けと云ったら、下女は変な顔をしていた。それから飯を済ましてすぐ学校へ出懸《でか》けた。靴《くつ》は磨《みが》いてなかった。　学校は昨日《きのう》車で乗りつけたから、大概《たいがい》の見当は分っている。四つ角を二三度曲がったらすぐ門の�
 �へ出た。門から玄関《げんかん》までは御影石《みかげいし》で敷《し》きつめてある。きのうこの敷石の上を車でがらがらと通った時は、無暗《むやみ》に仰山《ぎょうさん》な音がするので少し弱った。途中から小倉《こくら》の制服を着た生徒にたくさん逢《あ》ったが、みんなこの門をはいって行く。中にはおれより背が高くって強そうなのが居る。あんな奴を教えるのかと思ったら何だか気味が悪《わ》るくなった。名刺《めいし》を出したら校長室へ通した。校長は薄髯《うすひげ》のある、色の黒い、目の大きな狸《たぬき》のような男である。やにもったいぶっていた。まあ精出して勉強してくれと云って、恭《うやうや》しく大きな印の捺《おさ》った、辞令を渡《わた》した。この辞令は東京へ帰るとき丸めて海の中
 へ抛り込《こ》んでしまった。校長は今に職員に紹介《しょうかい》してやるから、一々その人にこの辞令を見せるんだと云って聞かした。余計な手数だ。そんな面倒《めんどう》な事をするよりこの辞令を三日間職員室へ張り付ける方がましだ。　教員が控所《ひかえじょ》へ揃《そろ》うには一時間目の喇叭《らっぱ》が鳴らなくてはならぬ。大分時間がある。校長は時計を出して見て、追々《おいおい》ゆるりと話すつもりだが、まず大体の事を呑《の》み込んでおいてもらおうと云って、それから教育の精神について長いお談義を聞かした。おれは無論いい加減に聞いていたが、途中からこれは飛んだ所へ来たと思った。校長の云うようにはとても出来ない。おれみたような無鉄砲《むてっぽう》なものをつらまえて、生徒の模範�
 ��もはん》になれの、一校の師表《しひょう》と仰《あお》がれなくてはいかんの、学問以外に個人の徳化を及《およ》ぼさなくては教育者になれないの、と無暗に法外な注文をする。そんなえらい人が月給四十円で遥々《はるばる》こんな田舎へくるもんか。人間は大概似たもんだ。腹が立てば喧嘩《けんか》の一つぐらいは誰でもするだろうと思ってたが、この様子じゃめったに口も聞けない、散歩も出来ない。そんなむずかしい役なら雇《やと》う前にこれこれだと話すがいい。おれは嘘《うそ》をつくのが嫌《きら》いだから、仕方がない、だまされて来たのだとあきらめて、思い切りよく、ここで断《こと》わって帰っちまおうと思った。宿屋へ五円やったから財布《さいふ》の中には九円なにがししかない。九円じゃ東京まで�
 �帰れない。茶代なんかやらなければよかった。惜《お》しい事をした。しかし九円だって、どうかならない事はない。旅費は足りなくっても嘘をつくよりましだと思って、到底《とうてい》あなたのおっしゃる通りにゃ、出来ません、この辞令は返しますと云ったら、校長は狸のような眼をぱちつかせておれの顔を見ていた。やがて、今のはただ希望である、あなたが希望通り出来ないのはよく知っているから心配しなくってもいいと云いながら笑った。そのくらいよく知ってるなら、始めから威嚇《おどさ》さなければいいのに。　そう、こうする内に喇叭が鳴った。教場の方が急にがやがやする。もう教員も控所へ揃いましたろうと云うから、校長に尾いて教員控所へはいった。広い細長い部屋の周囲に机を並《なら》べてみんな腰《
 こし》をかけている。おれがはいったのを見て、みんな申し合せたようにおれの顔を見た。見世物じゃあるまいし。それから申し付けられた通り一人一人《ひとりびとり》の前へ行って辞令を出して挨拶《あいさつ》をした。大概《たいがい》は椅子《いす》を離れて腰をかがめるばかりであったが、念の入ったのは差し出した辞令を受け取って一応拝見をしてそれを恭《うやうや》しく返却《へんきゃく》した。まるで宮芝居の真似《まね》だ。十五人目に体操《たいそう》の教師へと廻って来た時には、同じ事を何返もやるので少々じれったくなった。向《むこ》うは一度で済む。こっちは同じ所作《しょさ》を十五返繰り返している。少しはひとの了見《りょうけん》も察してみるがいい。　挨拶をしたうちに教頭のなにがしと云う�
 ��が居た。これは文学士だそうだ。文学士と云えば大学の卒業生だからえらい人なんだろう。妙《みょう》に女のような優しい声を出す人だった。もっとも驚いたのはこの暑いのにフランネルの襯衣《しゃつ》を着ている。いくらか薄《うす》い地には相違《そうい》なくっても暑いには極ってる。文学士だけにご苦労千万な服装《なり》をしたもんだ。しかもそれが赤シャツだから人を馬鹿《ばか》にしている。あとから聞いたらこの男は年が年中赤シャツを着るんだそうだ。妙な病気があった者だ。当人の説明では赤は身体《からだ》に薬になるから、衛生のためにわざわざ誂《あつ》らえるんだそうだが、入らざる心配だ。そんならついでに着物も袴《はかま》も赤にすればいい。それから英語の教師に古賀《こが》とか云う大変顔�
 �の悪《わ》るい男が居た。大概顔の蒼《あお》い人は瘠《や》せてるもんだがこの男は蒼くふくれている。昔《むかし》小学校へ行く時分、浅井《あさい》の民《たみ》さんと云う子が同級生にあったが、この浅井のおやじがやはり、こんな色つやだった。浅井は百姓《ひゃくしょう》だから、百姓になるとあんな顔になるかと清に聞いてみたら、そうじゃありません、あの人はうらなりの唐茄子《とうなす》ばかり食べるから、蒼くふくれるんですと教えてくれた。それ以来蒼くふくれた人を見れば必ずうらなりの唐茄子を食った酬《むく》いだと思う。この英語の教師もうらなりばかり食ってるに違《ちが》いない。もっともうらなりとは何の事か今もって知らない。清に聞いてみた事はあるが、清は笑って答えなかった。大方清も知
 らないんだろう。それからおれと同じ数学の教師に堀田《ほった》というのが居た。これは逞《たくま》しい毬栗坊主《いがぐりぼうず》で、叡山《えいざん》の悪僧《あくそう》と云うべき面構《つらがまえ》である。人が叮寧《ていねい》に辞令を見せたら見向きもせず、やあ君が新任の人か、ちと遊びに来給《きたま》えアハハハと云った。何がアハハハだ。そんな礼儀《れいぎ》を心得ぬ奴の所へ誰が遊びに行くものか。おれはこの時からこの坊主に山嵐《やまあらし》という渾名《あだな》をつけてやった。漢学の先生はさすがに堅《かた》いものだ。昨日お着きで、さぞお疲れで、それでもう授業をお始めで、大分ご励精《れいせい》で、――とのべつに弁じたのは愛嬌《あいきょう》のあるお爺《じい》さんだ。画学の教師�
 ��全く芸人風だ。べらべらした透綾《すきや》の羽織を着て、扇子《せんす》をぱちつかせて、お国はどちらでげす、え？　東京？　そりゃ嬉《うれ》しい、お仲間が出来て……私《わたし》もこれで江戸《えど》っ子ですと云った。こんなのが江戸っ子なら江戸には生れたくないもんだと心中に考えた。そのほか一人一人についてこんな事を書けばいくらでもある。しかし際限がないからやめる。　挨拶が一通り済んだら、校長が今日はもう引き取ってもいい、もっとも授業上の事は数学の主任と打ち合せをしておいて、明後日《あさって》から課業を始めてくれと云った。数学の主任は誰かと聞いてみたら例の山嵐であった。忌々《いまいま》しい、こいつの下に働くのかおやおやと失望した。山嵐は「おい君どこに宿《とま》ってる�
 �、山城屋か、うん、今に行って相談する」と云い残して白墨《はくぼく》を持って教場へ出て行った。主任の癖に向うから来て相談するなんて不見識な男だ。しかし呼び付けるよりは感心だ。　それから学校の門を出て、すぐ宿へ帰ろうと思ったが、帰ったって仕方がないから、少し町を散歩してやろうと思って、無暗に足の向く方をあるき散らした。県庁も見た。古い前世紀の建築である。兵営も見た。麻布《あざぶ》の聯隊《れんたい》より立派でない。大通りも見た。神楽坂《かぐらざか》を半分に狭くしたぐらいな道幅《みちはば》で町並《まちなみ》はあれより落ちる。二十五万石の城下だって高の知れたものだ。こんな所に住んでご城下だなどと威張《いば》ってる人間は可哀想《かわいそう》なものだと考えながらくると、
 いつしか山城屋の前に出た。広いようでも狭いものだ。これで大抵《たいてい》は見尽《みつく》したのだろう。帰って飯でも食おうと門口をはいった。帳場に坐《すわ》っていたかみさんが、おれの顔を見ると急に飛び出してきてお帰り……と板の間へ頭をつけた。靴《くつ》を脱《ぬ》いで上がると、お座敷《ざしき》があきましたからと下女が二階へ案内をした。十五｜畳《じょう》の表二階で大きな床《とこ》の間《ま》がついている。おれは生れてからまだこんな立派な座敷へはいった事はない。この後いつはいれるか分らないから、洋服を脱いで浴衣《ゆかた》一枚になって座敷の真中《まんなか》へ大の字に寝てみた。いい心持ちである。　昼飯を食ってから早速清へ手紙をかいてやった。おれは文章がまずい上に字を知ら�
 ��いから手紙を書くのが大嫌《だいきら》いだ。またやる所もない。しかし清は心配しているだろう。難船して死にやしないかなどと思っちゃ困るから、奮発《ふんぱつ》して長いのを書いてやった。その文句はこうである。「きのう着いた。つまらん所だ。十五畳の座敷に寝ている。宿屋へ茶代を五円やった。かみさんが頭を板の間へすりつけた。夕べは寝られなかった。清が笹飴を笹ごと食う夢を見た。来年の夏は帰る。今日学校へ行ってみんなにあだなをつけてやった。校長は狸、教頭は赤シャツ、英語の教師はうらなり、数学は山嵐、画学はのだいこ。今にいろいろな事を書いてやる。さようなら」　手紙をかいてしまったら、いい心持ちになって眠気《ねむけ》がさしたから、最前のように座敷の真中へのびのびと大の字に寝た�
 �今度は夢も何も見ないでぐっすり寝た。この部屋かいと大きな声がするので目が覚めたら、山嵐がはいって来た。最前は失敬、君の受持ちは……と人が起き上がるや否や談判を開かれたので大いに狼狽《ろうばい》した。受持ちを聞いてみると別段むずかしい事もなさそうだから承知した。このくらいの事なら、明後日は愚《おろか》、明日《あした》から始めろと云ったって驚ろかない。授業上の打ち合せが済んだら、君はいつまでこんな宿屋に居るつもりでもあるまい、僕《ぼく》がいい下宿を周旋《しゅうせん》してやるから移りたまえ。外のものでは承知しないが僕が話せばすぐ出来る。早い方がいいから、今日見て、あす移って、あさってから学校へ行けば極りがいいと一人で呑み込んでいる。なるほど十五畳敷にいつまで居る
 訳にも行くまい。月給をみんな宿料《しゅくりょう》に払《はら》っても追っつかないかもしれぬ。五円の茶代を奮発《ふんぱつ》してすぐ移るのはちと残念だが、どうせ移る者なら、早く引き越《こ》して落ち付く方が便利だから、そこのところはよろしく山嵐に頼《たの》む事にした。すると山嵐はともかくもいっしょに来てみろと云うから、行った。町はずれの岡の中腹にある家で至極｜閑静《かんせい》だ。主人は骨董《こっとう》を売買するいか銀と云う男で、女房《にょうぼう》は亭主《ていしゅ》よりも四つばかり年嵩《としかさ》の女だ。中学校に居た時ウィッチと云う言葉を習った事があるがこの女房はまさにウィッチに似ている。ウィッチだって人の女房だから構わない。とうとう明日から引き移る事にした。帰りに�
 ��嵐は通町《とおりちょう》で氷水を一｜杯奢《ぱいおご》った。学校で逢った時はやに横風《おうふう》な失敬な奴だと思ったが、こんなにいろいろ世話をしてくれるところを見ると、わるい男でもなさそうだ。ただおれと同じようにせっかちで肝癪持《かんしゃくもち》らしい。あとで聞いたらこの男が一番生徒に人望があるのだそうだ。　　　　　三　いよいよ学校へ出た。初めて教場へはいって高い所へ乗った時は、何だか変だった。講釈をしながら、おれでも先生が勤まるのかと思った。生徒はやかましい。時々｜図抜《ずぬ》けた大きな声で先生と云《い》う。先生には応《こた》えた。今まで物理学校で毎日先生先生と呼びつけていたが、先生と呼ぶのと、呼ばれるのは雲泥《うんでい》の差だ。何だか足の裏がむずむずす�
 �。おれは卑怯《ひきょう》な人間ではない。臆病《おくびょう》な男でもないが、惜《お》しい事に胆力《たんりょく》が欠けている。先生と大きな声をされると、腹の減った時に丸の内で午砲《どん》を聞いたような気がする。最初の一時間は何だかいい加減にやってしまった。しかし別段困った質問も掛《か》けられずに済んだ。控所《ひかえじょ》へ帰って来たら、山嵐がどうだいと聞いた。うんと単簡に返事をしたら山嵐は安心したらしかった。　二時間目に白墨《はくぼく》を持って控所を出た時には何だか敵地へ乗り込《こ》むような気がした。教場へ出ると今度の組は前より大きな奴《やつ》ばかりである。おれは江戸《えど》っ子で華奢《きゃしゃ》に小作りに出来ているから、どうも高い所へ上がっても押《お》しが利
 かない。喧嘩《けんか》なら相撲取《すもうとり》とでもやってみせるが、こんな大僧《おおぞう》を四十人も前へ並《なら》べて、ただ一｜枚《まい》の舌をたたいて恐縮《きょうしゅく》させる手際はない。しかしこんな田舎者《いなかもの》に弱身を見せると癖《くせ》になると思ったから、なるべく大きな声をして、少々巻き舌で講釈してやった。最初のうちは、生徒も烟《けむ》に捲《ま》かれてぼんやりしていたから、それ見ろとますます得意になって、べらんめい調を用いてたら、一番前の列の真中《まんなか》に居た、一番強そうな奴が、いきなり起立して先生と云う。そら来たと思いながら、何だと聞いたら、「あまり早くて分からんけれ、もちっと、ゆるゆる遣《や》って、おくれんかな、もし」と云った。おくれん�
 ��な［＃「おくれんかな」に傍点］、もし［＃「もし」に傍点］は生温《なまぬ》るい言葉だ。早過ぎるなら、ゆっくり云ってやるが、おれは江戸っ子だから君等《きみら》の言葉は使えない、分《わか》らなければ、分るまで待ってるがいいと答えてやった。この調子で二時間目は思ったより、うまく行った。ただ帰りがけに生徒の一人がちょっとこの問題を解釈をしておくれんかな、もし、と出来そうもない幾何《きか》の問題を持って逼《せま》ったには冷汗《ひやあせ》を流した。仕方がないから何だか分らない、この次教えてやると急いで引き揚《あ》げたら、生徒がわあと囃《はや》した。その中に出来ん出来んと云う声が聞《きこ》える。箆棒《べらぼう》め、先生だって、出来ないのは当り前だ。出来ないのを出来ないと�
 �うのに不思議があるもんか。そんなものが出来るくらいなら四十円でこんな田舎へくるもんかと控所へ帰って来た。今度はどうだとまた山嵐が聞いた。うんと云ったが、うんだけでは気が済まなかったから、この学校の生徒は分らずやだなと云ってやった。山嵐は妙《みょう》な顔をしていた。　三時間目も、四時間目も昼過ぎの一時間も大同小異であった。最初の日に出た級は、いずれも少々ずつ失敗した。教師ははたで見るほど楽じゃないと思った。授業はひと通り済んだが、まだ帰れない、三時までぽつ然《ねん》として待ってなくてはならん。三時になると、受持級の生徒が自分の教室を掃除《そうじ》して報知《しらせ》にくるから検分をするんだそうだ。それから、出席簿《しゅっせきぼ》を一応調べてようやくお暇《ひま》
 が出る。いくら月給で買われた身体《からだ》だって、あいた時間まで学校へ縛《しば》りつけて机と睨《にら》めっくらをさせるなんて法があるものか。しかしほかの連中はみんな大人《おとな》しくご規則通りやってるから新参のおればかり、だだを捏《こ》ねるのもよろしくないと思って我慢《がまん》していた。帰りがけに、君何でもかんでも三時｜過《すぎ》まで学校にいさせるのは愚《おろか》だぜと山嵐に訴えたら、山嵐はそうさアハハハと笑ったが、あとから真面目《まじめ》になって、君あまり学校の不平を云うと、いかんぜ。云うなら僕《ぼく》だけに話せ、随分《ずいぶん》妙な人も居るからなと忠告がましい事を云った。四つ角で分れたから詳《くわ》しい事は聞くひまがなかった。　それからうちへ帰ってくる�
 ��、宿の亭主《ていしゅ》がお茶を入れましょうと云ってやって来る。お茶を入れると云うからご馳走《ちそう》をするのかと思うと、おれの茶を遠慮《えんりょ》なく入れて自分が飲むのだ。この様子では留守中《るすちゅう》も勝手にお茶を入れましょうを一人《ひとり》で履行《りこう》しているかも知れない。亭主が云うには手前は書画骨董《しょがこっとう》がすきで、とうとうこんな商買を内々で始めるようになりました。あなたもお見受け申すところ大分ご風流でいらっしゃるらしい。ちと道楽にお始めなすってはいかがですと、飛んでもない勧誘《かんゆう》をやる。二年前ある人の使《つかい》に帝国《ていこく》ホテルへ行った時は錠前《じょうまえ》直しと間違《まちが》えられた事がある。ケットを被《かぶ》っ�
 �、鎌倉《かまくら》の大仏を見物した時は車屋から親方と云われた。その外｜今日《こんにち》まで見損《みそくな》われた事は随分あるが、まだおれをつらまえて大分ご風流でいらっしゃると云ったものはない。大抵《たいてい》はなりや様子でも分る。風流人なんていうものは、画《え》を見ても、頭巾《ずきん》を被《かぶ》るか短冊《たんざく》を持ってるものだ。このおれを風流人だなどと真面目に云うのはただの曲者《くせもの》じゃない。おれはそんな呑気《のんき》な隠居《いんきょ》のやるような事は嫌《きら》いだと云ったら、亭主はへへへへと笑いながら、いえ始めから好きなものは、どなたもございませんが、いったんこの道にはいるとなかなか出られませんと一人で茶を注いで妙な手付《てつき》をして飲んで
 いる。実はゆうべ茶を買ってくれと頼《たの》んでおいたのだが、こんな苦い濃《こ》い茶はいやだ。一｜杯《ぱい》飲むと胃に答えるような気がする。今度からもっと苦くないのを買ってくれと云ったら、かしこまりましたとまた一杯しぼって飲んだ。人の茶だと思って無暗《むやみ》に飲む奴《やつ》だ。主人が引き下がってから、明日の下読《したよみ》をしてすぐ寝《ね》てしまった。　それから毎日毎日学校へ出ては規則通り働く、毎日毎日帰って来ると主人がお茶を入れましょうと出てくる。一週間ばかりしたら学校の様子もひと通りは飲み込めたし、宿の夫婦の人物も大概《たいがい》は分った。ほかの教師に聞いてみると辞令を受けて一週間から一ヶ月ぐらいの間は自分の評判がいいだろうか、悪《わ》るいだろうか非常�
 ��気に掛《か》かるそうであるが、おれは一向そんな感じはなかった。教場で折々しくじるとその時だけはやな心持ちだが三十分ばかり立つと奇麗《きれい》に消えてしまう。おれは何事によらず長く心配しようと思っても心配が出来ない男だ。教場のしくじりが生徒にどんな影響《えいきょう》を与《あた》えて、その影響が校長や教頭にどんな反応を呈《てい》するかまるで無頓着《むとんじゃく》であった。おれは前に云う通りあまり度胸の据《すわ》った男ではないのだが、思い切りはすこぶるいい人間である。この学校がいけなければすぐどっかへ行《ゆ》く覚悟《かくご》でいたから、狸《たぬき》も赤シャツも、ちっとも恐《おそろ》しくはなかった。まして教場の小僧《こぞう》共なんかには愛嬌《あいきょう》もお世辞�
 �使う気になれなかった。学校はそれでいいのだが下宿の方はそうはいかなかった。亭主が茶を飲みに来るだけなら我慢もするが、いろいろな者を持ってくる。始めに持って来たのは何でも印材で、十《とお》ばかり並《なら》べておいて、みんなで三円なら安い物だお買いなさいと云う。田舎巡《いなかまわ》りのヘボ絵師じゃあるまいし、そんなものは入らないと云ったら、今度は華山《かざん》とか何とか云う男の花鳥の掛物《かけもの》をもって来た。自分で床《とこ》の間《ま》へかけて、いい出来じゃありませんかと云うから、そうかなと好加減《いいかげん》に挨拶《あいさつ》をすると、華山には二人《ふたり》ある、一人は何とか華山で、一人は何とか華山ですが、この幅《ふく》はその何とか華山の方だと、くだらない
 講釈をしたあとで、どうです、あなたなら十五円にしておきます。お買いなさいと催促《さいそく》をする。金がないと断わると、金なんか、いつでもようございますとなかなか頑固《がんこ》だ。金があつても買わないんだと、その時は追っ払《ぱら》っちまった。その次には鬼瓦《おにがわら》ぐらいな大硯《おおすずり》を担ぎ込んだ。これは端渓《たんけい》です、端渓ですと二｜遍《へん》も三遍も端渓がるから、面白半分に端渓た何だいと聞いたら、すぐ講釈を始め出した。端渓には上層中層下層とあって、今時のものはみんな上層ですが、これはたしかに中層です、この眼《がん》をご覧なさい。眼が三つあるのは珍《めず》らしい。溌墨《はつぼく》の具合も至極よろしい、試してご覧なさいと、おれの前へ大きな硯を突�
 ��つ》きつける。いくらだと聞くと、持主が支那《しな》から持って帰って来て是非売りたいと云いますから、お安くして三十円にしておきましょうと云う。この男は馬鹿《ばか》に相違《そうい》ない。学校の方はどうかこうか無事に勤まりそうだが、こう骨董責《こっとうぜめ》に逢《あ》ってはとても長く続きそうにない。　そのうち学校もいやになった。　　ある日の晩｜大町《おおまち》と云う所を散歩していたら郵便局の隣《とな》りに蕎麦《そば》とかいて、下に東京と注を加えた看板があった。おれは蕎麦が大好きである。東京に居《お》った時でも蕎麦屋の前を通って薬味の香《にお》いをかぐと、どうしても暖簾《のれん》がくぐりたくなった。今日までは数学と骨董で蕎麦を忘れていたが、こうして看板を見ると素�
 �りが出来なくなる。ついでだから一杯食って行こうと思って上がり込んだ。見ると看板ほどでもない。東京と断《こと》わる以上はもう少し奇麗にしそうなものだが、東京を知らないのか、金がないのか、滅法《めっぽう》きたない。畳《たたみ》は色が変ってお負けに砂でざらざらしている。壁《かべ》は煤《すす》で真黒《まっくろ》だ。天井《てんじょう》はランプの油烟《ゆえん》で燻《くす》ぼってるのみか、低くって、思わず首を縮めるくらいだ。ただ麗々と蕎麦の名前をかいて張り付けたねだん付けだけは全く新しい。何でも古いうちを買って二三日《にさんち》前から開業したに違《ちが》いなかろう。ねだん付の第一号に天麩羅《てんぷら》とある。おい天麩羅を持ってこいと大きな声を出した。するとこの時まで隅《
 すみ》の方に三人かたまって、何かつるつる、ちゅうちゅう食ってた連中《れんじゅう》が、ひとしくおれの方を見た。部屋《へや》が暗いので、ちょっと気がつかなかったが顔を合せると、みんな学校の生徒である。先方で挨拶《あいさつ》をしたから、おれも挨拶をした。その晩は久《ひさ》し振《ぶり》に蕎麦を食ったので、旨《うま》かったから天麩羅を四杯｜平《たいら》げた。　翌日何の気もなく教場へはいると、黒板一杯ぐらいな大きな字で、天麩羅先生とかいてある。おれの顔を見てみんなわあと笑った。おれは馬鹿馬鹿しいから、天麩羅を食っちゃ可笑《おか》しいかと聞いた。すると生徒の一人《ひとり》が、しかし四杯は過ぎるぞな、もし、と云った。四杯食おうが五杯食おうがおれの銭でおれが食うのに文句があ�
 ��もんかと、さっさと講義を済まして控所へ帰って来た。十分立って次の教場へ出ると一つ天麩羅四杯なり。但《ただ》し笑うべからず。と黒板にかいてある。さっきは別に腹も立たなかったが今度は癪《しゃく》に障《さわ》った。冗談《じょうだん》も度を過ごせばいたずらだ。焼餅《やきもち》の黒焦《くろこげ》のようなもので誰《だれ》も賞《ほ》め手はない。田舎者はこの呼吸が分からないからどこまで押《お》して行っても構わないと云う了見《りょうけん》だろう。一時間あるくと見物する町もないような狭《せま》い都に住んで、外に何にも芸がないから、天麩羅事件を日露《にちろ》戦争のように触《ふ》れちらかすんだろう。憐《あわ》れな奴等《やつら》だ。小供の時から、こんなに教育されるから、いやにひね�
 �こびた、植木鉢《うえきばち》の楓《かえで》みたような小人《しょうじん》が出来るんだ。無邪気《むじゃき》ならいっしょに笑ってもいいが、こりゃなんだ。小供の癖《くせ》に乙《おつ》に毒気を持ってる。おれはだまって、天麩羅を消して、こんないたずらが面白いか、卑怯《ひきょう》な冗談だ。君等は卑怯と云う意味を知ってるか、と云ったら、自分がした事を笑われて怒《おこ》るのが卑怯じゃろうがな、もしと答えた奴がある。やな奴だ。わざわざ東京から、こんな奴を教えに来たのかと思ったら情なくなった。余計な減らず口を利かないで勉強しろと云って、授業を始めてしまった。それから次の教場へ出たら天麩羅を食うと減らず口が利きたくなるものなりと書いてある。どうも始末に終えない。あんまり腹が立った
 から、そんな生意気な奴は教えないと云ってすたすた帰って来てやった。生徒は休みになって喜んだそうだ。こうなると学校より骨董の方がまだましだ。　天麩羅蕎麦もうちへ帰って、一晩寝たらそんなに肝癪《かんしゃく》に障らなくなった。学校へ出てみると、生徒も出ている。何だか訳が分らない。それから三日ばかりは無事であったが、四日目の晩に住田《すみた》と云う所へ行って団子《だんご》を食った。この住田と云う所は温泉のある町で城下から汽車だと十分ばかり、歩いて三十分で行かれる、料理屋も温泉宿も、公園もある上に遊廓《ゆうかく》がある。おれのはいった団子屋は遊廓の入口にあって、大変うまいという評判だから、温泉に行った帰りがけにちょっと食ってみた。今度は生徒にも逢わなかったから、誰《�
 ��れ》も知るまいと思って、翌日学校へ行って、一時間目の教場へはいると団子二｜皿《さら》七銭と書いてある。実際おれは二皿食って七銭｜払《はら》った。どうも厄介《やっかい》な奴等だ。二時間目にもきっと何かあると思うと遊廓の団子旨い旨いと書いてある。あきれ返った奴等だ。団子がそれで済んだと思ったら今度は赤手拭《あかてぬぐい》と云うのが評判になった。何の事だと思ったら、つまらない来歴だ。おれはここへ来てから、毎日住田の温泉へ行く事に極《き》めている。ほかの所は何を見ても東京の足元にも及《およ》ばないが温泉だけは立派なものだ。せっかく来た者だから毎日はいってやろうという気で、晩飯前に運動かたがた出掛《でかけ》る。ところが行くときは必ず西洋手拭の大きな奴をぶら下げて行�
 �。この手拭が湯に染《そま》った上へ、赤い縞《しま》が流れ出したのでちょっと見ると紅色《べにいろ》に見える。おれはこの手拭を行きも帰りも、汽車に乗ってもあるいても、常にぶら下げている。それで生徒がおれの事を赤手拭赤手拭と云うんだそうだ。どうも狭い土地に住んでるとうるさいものだ。まだある。温泉は三階の新築で上等は浴衣《ゆかた》をかして、流しをつけて八銭で済む。その上に女が天目《てんもく》へ茶を載《の》せて出す。おれはいつでも上等へはいった。すると四十円の月給で毎日上等へはいるのは贅沢《ぜいたく》だと云い出した。余計なお世話だ。まだある。湯壺《ゆつぼ》は花崗石《みかげいし》を畳《たた》み上げて、十五｜畳敷《じょうじき》ぐらいの広さに仕切ってある。大抵《たいてい》
 は十三四人｜漬《つか》ってるがたまには誰も居ない事がある。深さは立って乳の辺まであるから、運動のために、湯の中を泳ぐのはなかなか愉快《ゆかい》だ。おれは人の居ないのを見済《みすま》しては十五畳の湯壺を泳ぎ巡《まわ》って喜んでいた。ところがある日三階から威勢《いせい》よく下りて今日も泳げるかなとざくろ口を覗《のぞ》いてみると、大きな札へ黒々と湯の中で泳ぐべからずとかいて貼《は》りつけてある。湯の中で泳ぐものは、あまりあるまいから、この貼札《はりふだ》はおれのために特別に新調したのかも知れない。おれはそれから泳ぐのは断念した。泳ぐのは断念したが、学校へ出てみると、例の通り黒板に湯の中で泳ぐべからずと書いてあるには驚《おど》ろいた。何だか生徒全体がおれ一人を探偵�
 ��たんてい》しているように思われた。くさくさした。生徒が何を云ったって、やろうと思った事をやめるようなおれではないが、何でこんな狭苦しい鼻の先がつかえるような所へ来たのかと思うと情なくなった。それでうちへ帰ると相変らず骨董責である。　　　　　四　学校には宿直があって、職員が代る代るこれをつとめる。但《ただ》し狸《たぬき》と赤シャツは例外である。何でこの両人が当然の義務を免《まぬ》かれるのかと聞いてみたら、奏任待遇《そうにんたいぐう》だからと云う。面白くもない。月給はたくさんとる、時間は少ない、それで宿直を逃《の》がれるなんて不公平があるものか。勝手な規則をこしらえて、それが当《あた》り前《まえ》だというような顔をしている。よくまああんなにずうずうしく出来る�
 �のだ。これについては大分不平であるが、山嵐《やまあらし》の説によると、いくら一人《ひとり》で不平を並《なら》べたって通るものじゃないそうだ。一人だって二人《ふたり》だって正しい事なら通りそうなものだ。山嵐は might is right という英語を引いて説諭《せつゆ》を加えたが、何だか要領を得ないから、聞き返してみたら強者の権利と云う意味だそうだ。強者の権利ぐらいなら昔《むかし》から知っている。今さら山嵐から講釈をきかなくってもいい。強者の権利と宿直とは別問題だ。狸や赤シャツが強者だなんて、誰《だれ》が承知するものか。議論は議論としてこの宿直がいよいよおれの番に廻《まわ》って来た。一体｜疳性《かんしょう》だから夜具蒲団《やぐふとん》などは自分のものへ楽に寝ないと寝たような心�
 �ちがしない。小供の時から、友達のうちへ泊《とま》った事はほとんどないくらいだ。友達のうちでさえ厭《いや》なら学校の宿直はなおさら厭だ。厭だけれども、これが四十円のうちへ籠《こも》っているなら仕方がない。我慢《がまん》して勤めてやろう。　教師も生徒も帰ってしまったあとで、一人ぽかんとしているのは随分《ずいぶん》間が抜《ぬ》けたものだ。宿直部屋は教場の裏手にある寄宿舎の西はずれの一室だ。ちょっとはいってみたが、西日をまともに受けて、苦しくって居たたまれない。田舎《いなか》だけあって秋がきても、気長に暑いもんだ。生徒の賄《まかない》を取りよせて晩飯を済ましたが、まずいには恐《おそ》れ入《い》った。よくあんなものを食って、あれだけに暴れられたもんだ。それで晩飯を急
 いで四時半に片付けてしまうんだから豪傑《ごうけつ》に違《ちが》いない。飯は食ったが、まだ日が暮《く》れないから寝《ね》る訳に行かない。ちょっと温泉に行きたくなった。宿直をして、外へ出るのはいい事だか、悪《わ》るい事だかしらないが、こうつくねんとして重禁錮《じゅうきんこ》同様な憂目《うきめ》に逢《あ》うのは我慢の出来るもんじゃない。始めて学校へ来た時当直の人はと聞いたら、ちょっと用達《ようたし》に出たと小使《こづかい》が答えたのを妙《みょう》だと思ったが、自分に番が廻《まわ》ってみると思い当る。出る方が正しいのだ。おれは小使にちょっと出てくると云ったら、何かご用ですかと聞くから、用じゃない、温泉へはいるんだと答えて、さっさと出掛《でか》けた。赤手拭《あかてぬ�
 ��い》は宿へ忘れて来たのが残念だが今日は先方で借りるとしよう。　それからかなりゆるりと、出たりはいったりして、ようやく日暮方《ひぐれがた》になったから、汽車へ乗って古町《こまち》の停車場《ていしゃば》まで来て下りた。学校まではこれから四丁だ。訳はないとあるき出すと、向うから狸が来た。狸はこれからこの汽車で温泉へ行こうと云う計画なんだろう。すたすた急ぎ足にやってきたが、擦《す》れ違《ちが》った時おれの顔を見たから、ちょっと挨拶《あいさつ》をした。すると狸はあなたは今日は宿直ではなかったですかねえ［＃「なかったですかねえ」に傍点］と真面目《まじめ》くさって聞いた。なかったですかねえもないもんだ。二時間前おれに向って今夜は始めての宿直ですね。ご苦労さま。と礼を云�
 �たじゃないか。校長なんかになるといやに曲りくねった言葉を使うもんだ。おれは腹が立ったから、ええ宿直です。宿直ですから、これから帰って泊る事はたしかに泊りますと云い捨てて済ましてあるき出した。竪町《たてまち》の四つ角までくると今度は山嵐《やまあらし》に出っ喰《く》わした。どうも狭《せま》い所だ。出てあるきさえすれば必ず誰かに逢う。「おい君は宿直じゃないか」と聞くから「うん、宿直だ」と答えたら、「宿直が無暗《むやみ》に出てあるくなんて、不都合《ふつごう》じゃないか」と云った。「ちっとも不都合なもんか、出てあるかない方が不都合だ」と威張《いば》ってみせた。「君のずぼらにも困るな、校長か教頭に出逢うと面倒《めんどう》だぜ」と山嵐に似合わない事を云うから「校長にはた
 った今逢った。暑い時には散歩でもしないと宿直も骨でしょうと校長が、おれの散歩をほめたよ」と云って、面倒｜臭《くさ》いから、さっさと学校へ帰って来た。　それから日はすぐくれる。くれてから二時間ばかりは小使を宿直部屋へ呼んで話をしたが、それも飽《あ》きたから、寝られないまでも床《とこ》へはいろうと思って、寝巻に着換《きが》えて、蚊帳《かや》を捲《ま》くって、赤い毛布《けっと》を跳《は》ねのけて、とんと尻持《しりもち》を突《つ》いて、仰向《あおむ》けになった。おれが寝るときにとんと尻持をつくのは小供の時からの癖《くせ》だ。わるい癖だと云って小川町《おがわまち》の下宿に居た時分、二階下に居た法律学校の書生が苦情を持ち込《こ》んだ事がある。法律の書生なんてものは弱い�
 ��に、やに口が達者なもので、愚《ぐ》な事を長たらしく述べ立てるから、寝る時にどんどん音がするのはおれの尻がわるいのじゃない。下宿の建築が粗末《そまつ》なんだ。掛《か》ケ合うなら下宿へ掛ケ合えと凹《へこ》ましてやった。この宿直部屋は二階じゃないから、いくら、どしんと倒《たお》れても構わない。なるべく勢《いきおい》よく倒れないと寝たような心持ちがしない。ああ愉快だと足をうんと延ばすと、何だか両足へ飛び付いた。ざらざらして蚤《のみ》のようでもないからこいつあと驚《おど》ろいて、足を二三度｜毛布《けっと》の中で振《ふ》ってみた。するとざらざらと当ったものが、急に殖《ふ》え出して脛《すね》が五六カ所、股《もも》が二三カ所、尻の下でぐちゃりと踏《ふ》み潰《つぶ》したの�
 �一つ、臍《へそ》の所まで飛び上がったのが一つ――いよいよ驚ろいた。早速《さっそく》起き上《あが》って、毛布《けっと》をぱっと後ろへ抛《ほう》ると、蒲団の中から、バッタが五六十飛び出した。正体の知れない時は多少気味が悪《わ》るかったが、バッタと相場が極《き》まってみたら急に腹が立った。バッタの癖に人を驚ろかしやがって、どうするか見ろと、いきなり括《くく》り枕《まくら》を取って、二三度｜擲《たた》きつけたが、相手が小さ過ぎるから勢よく抛《な》げつける割に利目《ききめ》がない。仕方がないから、また布団の上へ坐《すわ》って、煤掃《すすはき》の時に蓙《ござ》を丸めて畳《たたみ》を叩《たた》くように、そこら近辺を無暗にたたいた。バッタが驚ろいた上に、枕の勢で飛び上がる
 ものだから、おれの肩《かた》だの、頭だの鼻の先だのへくっ付いたり、ぶつかったりする。顔へ付いた奴《やつ》は枕で叩く訳に行かないから、手で攫《つか》んで、一生懸命に擲きつける。忌々《いまいま》しい事に、いくら力を出しても、ぶつかる先が蚊帳だから、ふわりと動くだけで少しも手答がない。バッタは擲きつけられたまま蚊帳へつらまっている。死にもどうもしない。ようやくの事に三十分ばかりでバッタは退治《たいじ》た。箒《ほうき》を持って来てバッタの死骸《しがい》を掃き出した。小使が来て何ですかと云うから、何ですかもあるもんか、バッタを床の中に飼《か》っとく奴がどこの国にある。間抜《まぬけ》め。と叱《しか》ったら、私は存じませんと弁解をした。存じませんで済むかと箒を椽側《えん�
 ��わ》へ抛《ほう》り出したら、小使は恐る恐る箒を担いで帰って行った。　おれは早速寄宿生を三人ばかり総代に呼び出した。すると六人出て来た。六人だろうが十人だろうが構うものか。寝巻のまま腕《うで》まくりをして談判を始めた。「なんでバッタなんか、おれの床の中へ入れた」「バッタた何ぞな」と真先《まっさき》の一人がいった。やに落ち付いていやがる。この学校じゃ校長ばかりじゃない、生徒まで曲りくねった言葉を使うんだろう。「バッタを知らないのか、知らなけりゃ見せてやろう」と云ったが、生憎《あいにく》掃き出してしまって一｜匹《ぴき》も居ない。また小使を呼んで、「さっきのバッタを持ってこい」と云ったら、「もう掃溜《はきだめ》へ棄《す》ててしまいましたが、拾って参りましょうか」�
 �聞いた。「うんすぐ拾って来い」と云うと小使は急いで馳《か》け出したが、やがて半紙の上へ十匹ばかり載《の》せて来て「どうもお気の毒ですが、生憎夜でこれだけしか見当りません。あしたになりましたらもっと拾って参ります」と云う。小使まで馬鹿《ばか》だ。おれはバッタの一つを生徒に見せて「バッタたこれだ、大きなずう体をして、バッタを知らないた、何の事だ」と云うと、一番左の方に居た顔の丸い奴が「そりゃ、イナゴぞな、もし」と生意気におれを遣《や》り込《こ》めた。「篦棒《べらぼう》め、イナゴもバッタも同じもんだ。第一先生を捕《つら》まえてなもし［＃「なもし」に傍点］た何だ。菜飯《なめし》は田楽《でんがく》の時より外に食うもんじゃない」とあべこべに遣り込めてやったら「なもしと
 菜飯とは違うぞな、もし」と云った。いつまで行ってもなもし［＃「なもし」に傍点］を使う奴だ。「イナゴでもバッタでも、何でおれの床の中へ入れたんだ。おれがいつ、バッタを入れてくれと頼《たの》んだ」「誰も入れやせんがな」「入れないものが、どうして床の中に居るんだ」「イナゴは温《ぬく》い所が好きじゃけれ、大方一人でおはいりたのじゃあろ」「馬鹿あ云え。バッタが一人でおはいりになるなんて――バッタにおはいりになられてたまるもんか。――さあなぜこんないたずらをしたか、云え」「云えてて、入れんものを説明しようがないがな」　けちな奴等《やつら》だ。自分で自分のした事が云えないくらいなら、てんでしないがいい。証拠《しょうこ》さえ挙がらなければ、しらを切るつもりで図太く構えてい�
 ��がる。おれだって中学に居た時分は少しはいたずらもしたもんだ。しかしだれがしたと聞かれた時に、尻込みをするような卑怯《ひきょう》な事はただの一度もなかった。したものはしたので、しないものはしないに極《きま》ってる。おれなんぞは、いくら、いたずらをしたって潔白なものだ。嘘を吐《つ》いて罰《ばつ》を逃《に》げるくらいなら、始めからいたずらなんかやるものか。いたずらと罰はつきもんだ。罰があるからいたずらも心持ちよく出来る。いたずらだけで罰はご免蒙《めんこうむ》るなんて下劣《げれつ》な根性がどこの国に流行《はや》ると思ってるんだ。金は借りるが、返す事はご免だと云う連中はみんな、こんな奴等が卒業してやる仕事に相違《そうい》ない。全体中学校へ何しにはいってるんだ。学校�
 �はいって、嘘を吐いて、胡魔化《ごまか》して、陰《かげ》でこせこせ生意気な悪いたずらをして、そうして大きな面で卒業すれば教育を受けたもんだと癇違《かんちが》いをしていやがる。話せない雑兵《ぞうひょう》だ。　おれはこんな腐《くさ》った了見《りょうけん》の奴等と談判するのは胸糞《むなくそ》が悪《わ》るいから、「そんなに云われなきゃ、聞かなくっていい。中学校へはいって、上品も下品も区別が出来ないのは気の毒なものだ」と云って六人を逐《お》っ放《ぱな》してやった。おれは言葉や様子こそあまり上品じゃないが、心はこいつらよりも遥《はる》かに上品なつもりだ。六人は悠々《ゆうゆう》と引き揚《あ》げた。上部《うわべ》だけは教師のおれよりよっぽどえらく見える。実は落ち付いているだ
 けなお悪るい。おれには到底《とうてい》これほどの度胸はない。　それからまた床へはいって横になったら、さっきの騒動《そうどう》で蚊帳の中はぶんぶん唸《うな》っている。手燭《てしょく》をつけて一匹ずつ焼くなんて面倒な事は出来ないから、釣手《つりて》をはずして、長く畳《たた》んでおいて部屋の中で横竪《よこたて》十文字に振《ふる》ったら、環《かん》が飛んで手の甲《こう》をいやというほど撲《ぶ》った。三度目に床へはいった時は少々落ち付いたがなかなか寝られない。時計を見ると十時半だ。考えてみると厄介な所へ来たもんだ。一体中学の先生なんて、どこへ行っても、こんなものを相手にするなら気の毒なものだ。よく先生が品切れにならない。よっぽど辛防《しんぼう》強い朴念仁《ぼくねんじ�
 ��》がなるんだろう。おれには到底やり切れない。それを思うと清《きよ》なんてのは見上げたものだ。教育もない身分もない婆《ばあ》さんだが、人間としてはすこぶる尊《たっ》とい。今まではあんなに世話になって別段｜難有《ありがた》いとも思わなかったが、こうして、一人で遠国へ来てみると、始めてあの親切がわかる。越後《えちご》の笹飴《ささあめ》が食いたければ、わざわざ越後まで買いに行って食わしてやっても、食わせるだけの価値は充分《じゅうぶん》ある。清はおれの事を欲がなくって、真直《まっすぐ》な気性だと云って、ほめるが、ほめられるおれよりも、ほめる本人の方が立派な人間だ。何だか清に逢いたくなった。　清の事を考えながら、のつそつしていると、突然《とつぜん》おれの頭の上で、数�
 �云ったら三四十人もあろうか、二階が落っこちるほどどん、どん、どんと拍子《ひょうし》を取って床板を踏みならす音がした。すると足音に比例した大きな鬨《とき》の声が起《おこ》った。おれは何事が持ち上がったのかと驚ろいて飛び起きた。飛び起きる途端《とたん》に、ははあさっきの意趣返《いしゅがえ》しに生徒があばれるのだなと気がついた。手前のわるい事は悪るかったと言ってしまわないうちは罪は消えないもんだ。わるい事は、手前達に覚《おぼえ》があるだろう。本来なら寝てから後悔《こうかい》してあしたの朝でもあやまりに来るのが本筋だ。たとい、あやまらないまでも恐れ入って、静粛《せいしゅく》に寝ているべきだ。それを何だこの騒《さわ》ぎは。寄宿舎を建てて豚《ぶた》でも飼っておきあしま
 いし。気狂《きちが》いじみた真似《まね》も大抵《たいてい》にするがいい。どうするか見ろと、寝巻のまま宿直部屋を飛び出して、楷子段《はしごだん》を三股半《みまたはん》に二階まで躍《おど》り上がった。すると不思議な事に、今まで頭の上で、たしかにどたばた暴れていたのが、急に静まり返って、人声どころか足音もしなくなった。これは妙だ。ランプはすでに消してあるから、暗くてどこに何が居るか判然と分《わか》らないが、人気《ひとけ》のあるとないとは様子でも知れる。長く東から西へ貫《つらぬ》いた廊下《ろうか》には鼠《ねずみ》一｜匹《ぴき》も隠《かく》れていない。廊下のはずれから月がさして、遥か向うが際どく明るい。どうも変だ、おれは小供の時から、よく夢《ゆめ》を見る癖があって、�
 ��中《むちゅう》に跳ね起きて、わからぬ寝言を云って、人に笑われた事がよくある。十六七の時ダイヤモンドを拾った夢を見た晩なぞは、むくりと立ち上がって、そばに居た兄に、今のダイヤモンドはどうしたと、非常な勢《いきおい》で尋《たず》ねたくらいだ。その時は三日ばかりうち中《じゅう》の笑い草になって大いに弱った。ことによると今のも夢かも知れない。しかしたしかにあばれたに違いないがと、廊下の真中《まんなか》で考え込んでいると、月のさしている向うのはずれで、一二三わあと、三四十人の声がかたまって響《ひび》いたかと思う間もなく、前のように拍子を取って、一同が床板《ゆかいた》を踏み鳴らした。それ見ろ夢じゃないやっぱり事実だ。静かにしろ、夜なかだぞ、とこっちも負けんくらいな声�
 �出して、廊下を向うへ馳《か》けだした。おれの通る路《みち》は暗い、ただはずれに見える月あかりが目標《めじるし》だ。おれが馳け出して二間も来たかと思うと、廊下の真中で、堅《かた》い大きなものに向脛《むこうずね》をぶつけて、あ痛い［＃「あ痛い」に傍点］が頭へひびく間に、身体はすとんと前へ抛《ほう》り出された。こん畜生《ちきしょう》と起き上がってみたが、馳けられない。気はせくが、足だけは云う事を利かない。じれったいから、一本足で飛んで来たら、もう足音も人声も静まり返って、森《しん》としている。いくら人間が卑怯だって、こんなに卑怯に出来るものじゃない。まるで豚だ。こうなれば隠れている奴を引きずり出して、あやまらせてやるまではひかないぞと、心を極《き》めて寝室《しん
 しつ》の一つを開けて中を検査しようと思ったが開かない。錠《じょう》をかけてあるのか、机か何か積んで立て懸《か》けてあるのか、押《お》しても、押しても決して開かない。今度は向う合せの北側の室《へや》を試みた。開かない事はやっぱり同然である。おれが戸を開けて中に居る奴を引っ捕《つ》らまえてやろうと、焦慮《いらっ》てると、また東のはずれで鬨の声と足拍子が始まった。この野郎《やろう》申し合せて、東西相応じておれを馬鹿にする気だな、とは思ったがさてどうしていいか分らない。正直に白状してしまうが、おれは勇気のある割合に智慧《ちえ》が足りない。こんな時にはどうしていいかさっぱりわからない。わからないけれども、決して負けるつもりはない。このままに済ましてはおれの顔にかかわ�
 ��。江戸《えど》っ子は意気地《いくじ》がないと云われるのは残念だ。宿直をして鼻垂《はなった》れ小僧《こぞう》にからかわれて、手のつけようがなくって、仕方がないから泣き寝入りにしたと思われちゃ一生の名折れだ。これでも元は旗本《はたもと》だ。旗本の元は清和源氏《せいわげんじ》で、多田《ただ》の満仲《まんじゅう》の後裔《こうえい》だ。こんな土百姓《どびゃくしょう》とは生まれからして違うんだ。ただ智慧のないところが惜しいだけだ。どうしていいか分らないのが困るだけだ。困ったって負けるものか。正直だから、どうしていいか分らないんだ。世の中に正直が勝たないで、外に勝つものがあるか、考えてみろ。今夜中に勝てなければ、あした勝つ。あした勝てなければ、あさって勝つ。あさって勝�
 �なければ、下宿から弁当を取り寄せて勝つまでここに居る。おれはこう決心をしたから、廊下の真中へあぐらをかいて夜のあけるのを待っていた。蚊がぶんぶん来たけれども何ともなかった。さっき、ぶつけた向脛を撫《な》でてみると、何だかぬらぬらする。血が出るんだろう。血なんか出たければ勝手に出るがいい。そのうち最前からの疲《つか》れが出て、ついうとうと寝てしまった。何だか騒がしいので、眼《め》が覚めた時はえっ糞《くそ》しまったと飛び上がった。おれの坐《すわ》ってた右側にある戸が半分あいて、生徒が二人、おれの前に立っている。おれは正気に返って、はっと思う途端に、おれの鼻の先にある生徒の足を引《ひ》っ攫《つか》んで、力任せにぐいと引いたら、そいつは、どたりと仰向《あおむけ》に
 倒れた。ざまを見ろ。残る一人がちょっと狼狽《ろうばい》したところを、飛びかかって、肩を抑《おさ》えて二三度こづき廻したら、あっけに取られて、眼をぱちぱちさせた。さあおれの部屋まで来いと引っ立てると、弱虫だと見えて、一も二もなく尾《つ》いて来た。夜《よ》はとうにあけている。　おれが宿直部屋へ連れてきた奴を詰問《きつもん》し始めると、豚は、打《ぶ》っても擲いても豚だから、ただ知らんがなで、どこまでも通す了見と見えて、けっして白状しない。そのうち一人来る、二人来る、だんだん二階から宿直部屋へ集まってくる。見るとみんな眠《ねむ》そうに瞼《まぶた》をはらしている。けちな奴等だ。一晩ぐらい寝ないで、そんな面をして男と云われるか。面でも洗って議論に来いと云ってやったが、�
 ��も面を洗いに行かない。　おれは五十人あまりを相手に約一時間ばかり押問答《おしもんどう》をしていると、ひょっくり狸がやって来た。あとから聞いたら、小使が学校に騒動がありますって、わざわざ知らせに行ったのだそうだ。これしきの事に、校長を呼ぶなんて意気地がなさ過ぎる。それだから中学校の小使なんぞをしてるんだ。　校長はひと通りおれの説明を聞いた。生徒の言草《いいぐさ》もちょっと聞いた。追って処分するまでは、今まで通り学校へ出ろ。早く顔を洗って、朝飯を食わないと時間に間に合わないから、早くしろと云って寄宿生をみんな放免《ほうめん》した。手温《てぬ》るい事だ。おれなら即席《そくせき》に寄宿生をことごとく退校してしまう。こんな悠長《ゆうちょう》な事をするから生徒が宿直�
 �を馬鹿にするんだ。その上おれに向って、あなたもさぞご心配でお疲れでしょう、今日はご授業に及《およ》ばんと云うから、おれはこう答えた。「いえ、ちっとも心配じゃありません。こんな事が毎晩あっても、命のある間は心配にゃなりません。授業はやります、一晩ぐらい寝なくって、授業が出来ないくらいなら、頂戴《ちょうだい》した月給を学校の方へ割戻《わりもど》します」校長は何と思ったものか、しばらくおれの顔を見つめていたが、しかし顔が大分はれていますよと注意した。なるほど何だか少々重たい気がする。その上べた一面｜痒《かゆ》い。蚊がよっぽと刺《さ》したに相違ない。おれは顔中ぼりぼり掻《か》きながら、顔はいくら膨《は》れたって、口はたしかにきけますから、授業には差し支《つか》えま
 せんと答えた。校長は笑いながら、大分元気ですねと賞《ほ》めた。実を云うと賞めたんじゃあるまい、ひやかしたんだろう。　　　　　五　君｜釣《つ》りに行きませんかと赤シャツがおれに聞いた。赤シャツは気味の悪《わ》るいように優しい声を出す男である。まるで男だか女だか分《わか》りゃしない。男なら男らしい声を出すもんだ。ことに大学卒業生じゃないか。物理学校でさえおれくらいな声が出るのに、文学士がこれじゃ見っともない。　おれはそうですなあと少し進まない返事をしたら、君釣をした事がありますかと失敬な事を聞く。あんまりないが、子供の時、小梅《こうめ》の釣堀《つりぼり》で鮒《ふな》を三｜匹《びき》釣った事がある。それから神楽坂《かぐらざか》の毘沙門《びしゃもん》の縁日《えんに�
 ��》で八寸ばかりの鯉《こい》を針で引っかけて、しめたと思ったら、ぽちゃりと落としてしまったがこれは今考えても惜《お》しいと云《い》ったら、赤シャツは顋《あご》を前の方へ突《つ》き出してホホホホと笑った。何もそう気取って笑わなくっても、よさそうな者だ。「それじゃ、まだ釣りの味は分らんですな。お望みならちと伝授しましょう」とすこぶる得意である。だれがご伝授をうけるものか。一体釣や猟《りょう》をする連中はみんな不人情な人間ばかりだ。不人情でなくって、殺生《せっしょう》をして喜ぶ訳がない。魚だって、鳥だって殺されるより生きてる方が楽に極《き》まってる。釣や猟をしなくっちゃ活計《かっけい》がたたないなら格別だが、何不足なく暮《くら》している上に、生き物を殺さなくっち�
 �寝られないなんて贅沢《ぜいたく》な話だ。こう思ったが向《むこ》うは文学士だけに口が達者だから、議論じゃ叶《かな》わないと思って、だまってた。すると先生このおれを降参させたと疳違《かんちが》いして、早速伝授しましょう。おひまなら、今日どうです、いっしょに行っちゃ。吉川《よしかわ》君と二人《ふたり》ぎりじゃ、淋《さむ》しいから、来たまえとしきりに勧める。吉川君というのは画学の教師で例の野だいこの事だ。この野だは、どういう了見《りょうけん》だか、赤シャツのうちへ朝夕｜出入《でいり》して、どこへでも随行《ずいこう》して行《ゆ》く。まるで同輩《どうはい》じゃない。主従《しゅうじゅう》みたようだ。赤シャツの行く所なら、野だは必ず行くに極《きま》っているんだから、今さら
 驚《おど》ろきもしないが、二人で行けば済むところを、なんで無愛想《ぶあいそ》のおれへ口を掛《か》けたんだろう。大方｜高慢《こうまん》ちきな釣道楽で、自分の釣るところをおれに見せびらかすつもりかなんかで誘《さそ》ったに違いない。そんな事で見せびらかされるおれじゃない。鮪《まぐろ》の二匹や三匹釣ったって、びくともするもんか。おれだって人間だ、いくら下手《へた》だって糸さえ卸《おろ》しゃ、何かかかるだろう、ここでおれが行かないと、赤シャツの事だから、下手だから行かないんだ、嫌《きら》いだから行かないんじゃないと邪推《じゃすい》するに相違《そうい》ない。おれはこう考えたから、行きましょうと答えた。それから、学校をしまって、一応うちへ帰って、支度《したく》を整えて、�
 ��車場で赤シャツと野だを待ち合せて浜《はま》へ行った。船頭は一人《ひとり》で、船《ふね》は細長い東京辺では見た事もない恰好《かっこう》である。さっきから船中｜見渡《みわた》すが釣竿《つりざお》が一本も見えない。釣竿なしで釣が出来るものか、どうする了見だろうと、野だに聞くと、沖釣《おきづり》には竿は用いません、糸だけでげすと顋を撫《な》でて黒人《くろうと》じみた事を云った。こう遣《や》り込《こ》められるくらいならだまっていればよかった。　船頭はゆっくりゆっくり漕《こ》いでいるが熟練は恐《おそろ》しいもので、見返《みか》えると、浜が小さく見えるくらいもう出ている。高柏寺《こうはくじ》の五重の塔《とう》が森の上へ抜《ぬ》け出して針のように尖《とん》がってる。向側�
 �むこうがわ》を見ると青嶋《あおしま》が浮いている。これは人の住まない島だそうだ。よく見ると石と松《まつ》ばかりだ。なるほど石と松ばかりじゃ住めっこない。赤シャツは、しきりに眺望《ちょうぼう》していい景色だと云ってる。野だは絶景でげすと云ってる。絶景だか何だか知らないが、いい心持ちには相違ない。ひろびろとした海の上で、潮風に吹《ふ》かれるのは薬だと思った。いやに腹が減る。「あの松を見たまえ、幹が真直《まっすぐ》で、上が傘《かさ》のように開いてターナーの画にありそうだね」と赤シャツが野だに云うと、野だは「全くターナーですね。どうもあの曲り具合ったらありませんね。ターナーそっくりですよ」と心得顔である。ターナーとは何の事だか知らないが、聞かないでも困らない事だか
 ら黙《だま》っていた。舟は島を右に見てぐるりと廻《まわ》った。波は全くない。これで海だとは受け取りにくいほど平《たいら》だ。赤シャツのお陰《かげ》ではなはだ愉快《ゆかい》だ。出来る事なら、あの島の上へ上がってみたいと思ったから、あの岩のある所へは舟はつけられないんですかと聞いてみた。つけられん事もないですが、釣をするには、あまり岸じゃいけないですと赤シャツが異議を申し立てた。おれは黙ってた。すると野だがどうです教頭、これからあの島をターナー島と名づけようじゃありませんかと余計な発議《ほつぎ》をした。赤シャツはそいつは面白い、吾々《われわれ》はこれからそう云おうと賛成した。この吾々のうちにおれもはいってるなら迷惑《めいわく》だ。おれには青嶋でたくさんだ。あの�
 ��の上に、どうです、ラフハエルのマドンナを置いちゃ。いい画が出来ますぜと野だが云うと、マドンナの話はよそうじゃないかホホホホと赤シャツが気味の悪るい笑い方をした。なに誰も居ないから大丈夫《だいじょうぶ》ですと、ちょっとおれの方を見たが、わざと顔をそむけてにやにやと笑った。おれは何だかやな心持ちがした。マドンナだろうが、小旦那《こだんな》だろうが、おれの関係した事でないから、勝手に立たせるがよかろうが、人に分らない事を言って分らないから聞いたって構やしませんてえような風をする。下品な仕草だ。これで当人は私《わたし》も江戸《えど》っ子でげすなどと云ってる。マドンナと云うのは何でも赤シャツの馴染《なじみ》の芸者の渾名《あだな》か何かに違いないと思った。なじみの芸�
 �を無人島の松の木の下に立たして眺《なが》めていれば世話はない。それを野だが油絵にでもかいて展覧会へ出したらよかろう。　ここいらがいいだろうと船頭は船をとめて、錨《いかり》を卸した。幾尋《いくひろ》あるかねと赤シャツが聞くと、六尋《むひろ》ぐらいだと云う。六尋ぐらいじゃ鯛《たい》はむずかしいなと、赤シャツは糸を海へなげ込んだ。大将鯛を釣る気と見える、豪胆《ごうたん》なものだ。野だは、なに教頭のお手際じゃかかりますよ。それになぎですからとお世辞を云いながら、これも糸を繰《く》り出して投げ入れる。何だか先に錘《おもり》のような鉛《なまり》がぶら下がってるだけだ。浮《うき》がない。浮がなくって釣をするのは寒暖計なしで熱度をはかるようなものだ。おれには到底《とうてい
 》出来ないと見ていると、さあ君もやりたまえ糸はありますかと聞く。糸はあまるほどあるが、浮がありませんと云ったら、浮がなくっちゃ釣が出来ないのは素人《しろうと》ですよ。こうしてね、糸が水底《みずそこ》へついた時分に、船縁《ふなべり》の所で人指しゆびで呼吸をはかるんです、食うとすぐ手に答える。――そらきた、と先生急に糸をたぐり始めるから、何かかかったと思ったら何にもかからない、餌《え》がなくなってたばかりだ。いい気味《きび》だ。教頭、残念な事をしましたね、今のはたしかに大ものに違いなかったんですが、どうも教頭のお手際でさえ逃《に》げられちゃ、今日は油断ができませんよ。しかし逃げられても何ですね。浮と睨《にら》めくらをしている連中よりはましですね。ちょうど歯どめ�
 ��なくっちゃ自転車へ乗れないのと同程度ですからねと野だは妙《みよう》な事ばかり喋舌《しゃべ》る。よっぽど撲《なぐ》りつけてやろうかと思った。おれだって人間だ、教頭ひとりで借り切った海じゃあるまいし。広い所だ。鰹《かつお》の一匹ぐらい義理にだって、かかってくれるだろうと、どぼんと錘と糸を抛《ほう》り込んでいい加減に指の先であやつっていた。　しばらくすると、何だかぴくぴくと糸にあたるものがある。おれは考えた。こいつは魚に相違ない。生きてるものでなくっちゃ、こうぴくつく訳がない。しめた、釣れたとぐいぐい手繰《たぐ》り寄せた。おや釣れましたかね、後世｜恐《おそ》るべしだと野だがひやかすうち、糸はもう大概手繰り込んでただ五尺ばかりほどしか、水に浸《つ》いておらん。船�
 �から覗《のぞ》いてみたら、金魚のような縞《しま》のある魚が糸にくっついて、右左へ漾《ただよ》いながら、手に応じて浮き上がってくる。面白い。水際から上げるとき、ぽちゃりと跳《は》ねたから、おれの顔は潮水だらけになった。ようやくつらまえて、針をとろうとするがなかなか取れない。捕《つら》まえた手はぬるぬるする。大いに気味がわるい。面倒だから糸を振《ふ》って胴《どう》の間《ま》へ擲《たた》きつけたら、すぐ死んでしまった。赤シャツと野だは驚ろいて見ている。おれは海の中で手をざぶざぶと洗って、鼻の先へあてがってみた。まだ腥臭《なまぐさ》い。もう懲《こ》り懲《ご》りだ。何が釣れたって魚は握《にぎ》りたくない。魚も握られたくなかろう。そうそう糸を捲いてしまった。　一番槍《
 いちばんやり》はお手柄《てがら》だがゴルキじゃ、と野だがまた生意気を云うと、ゴルキと云うと露西亜《ロシア》の文学者みたような名だねと赤シャツが洒落《しゃれ》た。そうですね、まるで露西亜の文学者ですねと野だはすぐ賛成しやがる。ゴルキが露西亜の文学者で、丸木が芝《しば》の写真師で、米のなる木が命の親だろう。一体この赤シャツはわるい癖《くせ》だ。誰《だれ》を捕《つら》まえても片仮名の唐人《とうじん》の名を並べたがる。人にはそれぞれ専門があったものだ。おれのような数学の教師にゴルキだか車力《しゃりき》だか見当がつくものか、少しは遠慮《えんりょ》するがいい。云《い》うならフランクリンの自伝だとかプッシング、ツー、ゼ、フロントだとか、おれでも知ってる名を使うがいい。赤�
 ��ャツは時々帝国文学とかいう真赤《まっか》な雑誌を学校へ持って来て難有《ありがた》そうに読んでいる。山嵐《やまあらし》に聞いてみたら、赤シャツの片仮名はみんなあの雑誌から出るんだそうだ。帝国文学も罪な雑誌だ。　それから赤シャツと野だは一生懸命《いっしょうけんめい》に釣っていたが、約一時間ばかりのうちに二人《ふたり》で十五六上げた。可笑《おか》しい事に釣れるのも、釣れるのも、みんなゴルキばかりだ。鯛なんて薬にしたくってもありゃしない。今日は露西亜文学の大当りだと赤シャツが野だに話している。あなたの手腕《しゅわん》でゴルキなんですから、私《わたし》なんぞがゴルキなのは仕方がありません。当り前ですなと野だが答えている。船頭に聞くとこの小魚は骨が多くって、まずくっ�
 �、とても食えないんだそうだ。ただ肥料《こやし》には出来るそうだ。赤シャツと野だは一生懸命に肥料を釣っているんだ。気の毒の至りだ。おれは一｜匹《ぴき》で懲《こ》りたから、胴の間へ仰向《あおむ》けになって、さっきから大空を眺めていた。釣をするよりこの方がよっぽど洒落《しゃれ》ている。　すると二人は小声で何か話し始めた。おれにはよく聞《きこ》えない、また聞きたくもない。おれは空を見ながら清《きよ》の事を考えている。金があって、清をつれて、こんな奇麗《きれい》な所へ遊びに来たらさぞ愉快だろう。いくら景色がよくっても野だなどといっしょじゃつまらない。清は皺苦茶《しわくちゃ》だらけの婆さんだが、どんな所へ連れて出たって恥《は》ずかしい心持ちはしない。野だのようなのは、
 馬車に乗ろうが、船に乗ろうが、凌雲閣《りょううんかく》へのろうが、到底寄り付けたものじゃない。おれが教頭で、赤シャツがおれだったら、やっぱりおれにへけつけお世辞を使って赤シャツを冷《ひや》かすに違いない。江戸っ子は軽薄《けいはく》だと云うがなるほどこんなものが田舎巡《いなかまわ》りをして、私《わたし》は江戸っ子でげすと繰り返していたら、軽薄は江戸っ子で、江戸っ子は軽薄の事だと田舎者が思うに極まってる。こんな事を考えていると、何だか二人がくすくす笑い出した。笑い声の間に何か云うが途切《とぎ》れ途切れでとんと要領を得ない。「え？　どうだか……」「……全くです……知らないんですから……罪ですね」「まさか……」「バッタを……本当ですよ」　おれは外の言葉には耳を傾《�
 ��たむ》けなかったが、バッタと云う野だの語《ことば》を聴《き》いた時は、思わずきっとなった。野だは何のためかバッタと云う言葉だけことさら力を入れて、明瞭《めいりょう》におれの耳にはいるようにして、そのあとをわざとぼかしてしまった。おれは動かないでやはり聞いていた。「また例の堀田《ほった》が……」「そうかも知れない……」「天麩羅《てんぷら》……ハハハハハ」「……煽動《せんどう》して……」「団子《だんご》も？」　言葉はかように途切れ途切れであるけれども、バッタだの天麩羅だの、団子だのというところをもって推し測ってみると、何でもおれのことについて内所話《ないしょばな》しをしているに相違ない。話すならもっと大きな声で話すがいい、また内所話をするくらいなら、おれなん�
 �誘わなければいい。いけ好かない連中だ。バッタだろうが雪踏《せった》だろうが、非はおれにある事じゃない。校長がひとまずあずけろと云ったから、狸《たぬき》の顔にめんじてただ今のところは控《ひか》えているんだ。野だの癖に入らぬ批評をしやがる。毛筆《けふで》でもしゃぶって引っ込んでるがいい。おれの事は、遅《おそ》かれ早かれ、おれ一人で片付けてみせるから、差支《さしつか》えはないが、また例の堀田が［＃「また例の堀田が」に傍点］とか煽動して［＃「煽動して」に傍点］とか云う文句が気にかかる。堀田がおれを煽動して騒動《そうどう》を大きくしたと云う意味なのか、あるいは堀田が生徒を煽動しておれをいじめたと云うのか方角がわからない。青空を見ていると、日の光がだんだん弱って来て、
 少しはひやりとする風が吹き出した。線香《せんこう》の烟《けむり》のような雲が、透《す》き徹《とお》る底の上を静かに伸《の》して行ったと思ったら、いつしか底の奥《おく》に流れ込んで、うすくもやを掛《か》けたようになった。　もう帰ろうかと赤シャツが思い出したように云うと、ええちょうど時分ですね。今夜はマドンナの君にお逢《あ》いですかと野だが云う。赤シャツは馬鹿《ばか》あ云っちゃいけない、間違いになると、船縁に身を倚《も》たした奴《やつ》を、少し起き直る。エヘヘヘヘ大丈夫ですよ。聞いたって……と野だが振り返った時、おれは皿《さら》のような眼《め》を野だの頭の上へまともに浴びせ掛けてやった。野だはまぼしそうに引っ繰り返って、や、こいつは降参だと首を縮めて、頭を掻《�
 ��》いた。何という猪口才《ちょこざい》だろう。　船は静かな海を岸へ漕《こ》ぎ戻《もど》る。君｜釣《つり》はあまり好きでないと見えますねと赤シャツが聞くから、ええ寝《ね》ていて空を見る方がいいですと答えて、吸いかけた巻烟草《まきたばこ》を海の中へたたき込んだら、ジュと音がして艪《ろ》の足で掻き分けられた浪《なみ》の上を揺《ゆ》られながら漾《ただよ》っていった。「君が来たんで生徒も大いに喜んでいるから、奮発《ふんぱつ》してやってくれたまえ」と今度は釣にはまるで縁故《えんこ》もない事を云い出した。「あんまり喜んでもいないでしょう」「いえ、お世辞じゃない。全く喜んでいるんです、ね、吉川君」「喜んでるどころじゃない。大騒《おおさわ》ぎです」と野だはにやにやと笑った。�
 �いつの云う事は一々｜癪《しゃく》に障《さわ》るから妙だ。「しかし君注意しないと、険呑《けんのん》ですよ」と赤シャツが云うから「どうせ険呑です。こうなりゃ険呑は覚悟《かくご》です」と云ってやった。実際おれは免職《めんしょく》になるか、寄宿生をことごとくあやまらせるか、どっちか一つにする了見でいた。「そう云っちゃ、取りつきどころもないが――実は僕も教頭として君のためを思うから云うんだが、わるく取っちゃ困る」「教頭は全く君に好意を持ってるんですよ。僕も及《およ》ばずながら、同じ江戸っ子だから、なるべく長くご在校を願って、お互《たがい》に力になろうと思って、これでも蔭ながら尽力《じんりょく》しているんですよ」と野だが人間｜並《なみ》の事を云った。野だのお世話になる
 くらいなら首を縊《くく》って死んじまわあ。「それでね、生徒は君の来たのを大変｜歓迎《かんげい》しているんだが、そこにはいろいろな事情があってね。君も腹の立つ事もあるだろうが、ここが我慢《がまん》だと思って、辛防《しんぼう》してくれたまえ。決して君のためにならないような事はしないから」「いろいろの事情た、どんな事情です」「それが少し込み入ってるんだが、まあだんだん分りますよ。僕《ぼく》が話さないでも自然と分って来るです、ね吉川君」「ええなかなか込み入ってますからね。一朝一夕にゃ到底分りません。しかしだんだん分ります、僕が話さないでも自然と分って来るです」と野だは赤シャツと同じような事を云う。「そんな面倒《めんどう》な事情なら聞かなくてもいいんですが、あなたの�
 ��から話し出したから伺《うかが》うんです」「そりゃごもっともだ。こっちで口を切って、あとをつけないのは無責任ですね。それじゃこれだけの事を云っておきましょう。あなたは失礼ながら、まだ学校を卒業したてで、教師は始めての、経験である。ところが学校というものはなかなか情実のあるもので、そう書生流に淡泊《たんぱく》には行《ゆ》かないですからね」「淡泊に行かなければ、どんな風に行くんです」「さあ君はそう率直だから、まだ経験に乏《とぼ》しいと云うんですがね……」「どうせ経験には乏しいはずです。履歴書《りれきしょ》にもかいときましたが二十三年四ヶ月ですから」「さ、そこで思わぬ辺から乗ぜられる事があるんです」「正直にしていれば誰《だれ》が乗じたって怖《こわ》くはないです」�
 �無論怖くはない、怖くはないが、乗ぜられる。現に君の前任者がやられたんだから、気を付けないといけないと云うんです」　野だが大人《おとな》しくなったなと気が付いて、ふり向いて見ると、いつしか艫《とも》の方で船頭と釣の話をしている。野だが居ないんでよっぽど話しよくなった。「僕の前任者が、誰《だ》れに乗ぜられたんです」「だれと指すと、その人の名誉に関係するから云えない。また判然と証拠《しょうこ》のない事だから云うとこっちの落度になる。とにかく、せっかく君が来たもんだから、ここで失敗しちゃ僕等《ぼくら》も君を呼んだ甲斐《かい》がない。どうか気を付けてくれたまえ」「気を付けろったって、これより気の付けようはありません。わるい事をしなけりゃ好《い》いんでしょう」　赤シャ
 ツはホホホホと笑った。別段おれは笑われるような事を云った覚えはない。今日《こんにち》ただ今に至るまでこれでいいと堅《かた》く信じている。考えてみると世間の大部分の人はわるくなる事を奨励《しょうれい》しているように思う。わるくならなければ社会に成功はしないものと信じているらしい。たまに正直な純粋《じゅんすい》な人を見ると、坊《ぼ》っちゃんだの小僧《こぞう》だのと難癖《なんくせ》をつけて軽蔑《けいべつ》する。それじゃ小学校や中学校で嘘《うそ》をつくな、正直にしろと倫理《りんり》の先生が教えない方がいい。いっそ思い切って学校で嘘をつく法とか、人を信じない術とか、人を乗せる策を教授する方が、世のためにも当人のためにもなるだろう。赤シャツがホホホホと笑ったのは、おれ�
 ��単純なのを笑ったのだ。単純や真率が笑われる世の中じゃ仕様がない。清はこんな時に決して笑った事はない。大いに感心して聞いたもんだ。清の方が赤シャツよりよっぽど上等だ。「無論｜悪《わ》るい事をしなければ好いんですが、自分だけ悪るい事をしなくっても、人の悪るいのが分らなくっちゃ、やっぱりひどい目に逢うでしょう。世の中には磊落《らいらく》なように見えても、淡泊なように見えても、親切に下宿の世話なんかしてくれても、めったに油断の出来ないのがありますから……。大分寒くなった。もう秋ですね、浜の方は靄《もや》でセピヤ色になった。いい景色だ。おい、吉川君どうだい、あの浜の景色は……」と大きな声を出して野だを呼んだ。なあるほどこりゃ奇絶《きぜつ》ですね。時間があると写生す�
 �んだが、惜《お》しいですね、このままにしておくのはと野だは大いにたたく。　港屋の二階に灯が一つついて、汽車の笛《ふえ》がヒューと鳴るとき、おれの乗っていた舟は磯《いそ》の砂へざぐりと、舳《へさき》をつき込んで動かなくなった。お早うお帰りと、かみさんが、浜に立って赤シャツに挨拶《あいさつ》する。おれは船端《ふなばた》から、やっと掛声《かけごえ》をして磯へ飛び下りた。　　　　　六　野だは大嫌《だいきら》いだ。こんな奴《やつ》は沢庵石《たくあんいし》をつけて海の底へ沈《しず》めちまう方が日本のためだ。赤シャツは声が気に食わない。あれは持前の声をわざと気取ってあんな優しいように見せてるんだろう。いくら気取ったって、あの面じゃ駄目《だめ》だ。惚《ほ》れるものがあった
 ってマドンナぐらいなものだ。しかし教頭だけに野だよりむずかしい事を云《い》う。うちへ帰って、あいつの申し条を考えてみると一応もっとものようでもある。はっきりとした事は云わないから、見当がつきかねるが、何でも山嵐《やまあらし》がよくない奴だから用心しろと云うのらしい。それならそうとはっきり断言するがいい、男らしくもない。そうして、そんな悪《わ》るい教師なら、早く免職《めんしょく》さしたらよかろう。教頭なんて文学士の癖《くせ》に意気地《いくじ》のないもんだ。蔭口《かげぐち》をきくのでさえ、公然と名前が云えないくらいな男だから、弱虫に極《き》まってる。弱虫は親切なものだから、あの赤シャツも女のような親切ものなんだろう。親切は親切、声は声だから、声が気に入らないっ�
 ��、親切を無にしちゃ筋が違《ちが》う。それにしても世の中は不思議なものだ、虫の好かない奴が親切で、気のあった友達が悪漢《わるもの》だなんて、人を馬鹿《ばか》にしている。大方｜田舎《いなか》だから万事東京のさかに行くんだろう。物騒《ぶっそう》な所だ。今に火事が氷って、石が豆腐《とうふ》になるかも知れない。しかし、あの山嵐が生徒を煽動するなんて、いたずらをしそうもないがな。一番人望のある教師だと云うから、やろうと思ったら大抵《たいてい》の事は出来るかも知れないが、――第一そんな廻《まわ》りくどい事をしないでも、じかにおれを捕《つら》まえて喧嘩《けんか》を吹き懸《か》けりゃ手数が省ける訳だ。おれが邪魔《じゃま》になるなら、実はこれこれだ、邪魔だから辞職してくれと�
 �や、よさそうなもんだ。物は相談ずくでどうでもなる。向《むこ》うの云い条がもっともなら、明日にでも辞職してやる。ここばかり米が出来る訳でもあるまい。どこの果《はて》へ行ったって、のたれ死《じに》はしないつもりだ。山嵐もよっぽど話せない奴だな。　ここへ来た時第一番に氷水を奢《おご》ったのは山嵐だ。そんな裏表のある奴から、氷水でも奢ってもらっちゃ、おれの顔に関わる。おれはたった一｜杯《ぱい》しか飲まなかったから一銭五｜厘《りん》しか払《はら》わしちゃない。しかし一銭だろうが五厘だろうが、詐欺師《さぎし》の恩になっては、死ぬまで心持ちがよくない。あした学校へ行ったら、一銭五厘返しておこう。おれは清《きよ》から三円借りている。その三円は五年｜経《た》った今日までまだ
 返さない。返せないんじゃない。返さないんだ。清は今に返すだろうなどと、かりそめにもおれの懐中《かいちゅう》をあてにしてはいない。おれも今に返そうなどと他人がましい義理立てはしないつもりだ。こっちがこんな心配をすればするほど清の心を疑ぐるようなもので、清の美しい心にけちを付けると同じ事になる。返さないのは清を踏《ふ》みつけるのじゃない、清をおれの片破《かたわ》れと思うからだ。清と山嵐とはもとより比べ物にならないが、たとい氷水だろうが、甘茶《あまちゃ》だろうが、他人から恵《めぐみ》を受けて、だまっているのは向うをひとかどの人間と見立てて、その人間に対する厚意の所作だ。割前を出せばそれだけの事で済むところを、心のうちで難有《ありがた》いと恩に着るのは銭金で買える�
 ��礼じゃない。無位無冠でも一人前の独立した人間だ。独立した人間が頭を下げるのは百万両より尊《たっ》といお礼と思わなければならない。　おれはこれでも山嵐に一銭五厘｜奮発《ふんぱつ》させて、百万両より尊とい返礼をした気でいる。山嵐は難有《ありがた》いと思ってしかるべきだ。それに裏へ廻って卑劣《ひれつ》な振舞《ふるまい》をするとは怪《け》しからん野郎《やろう》だ。あした行って一銭五厘返してしまえば借りも貸しもない。そうしておいて喧嘩をしてやろう。　おれはここまで考えたら、眠《ねむ》くなったからぐうぐう寝《ね》てしまった。あくる日は思う仔細《しさい》があるから、例刻より早ヤ目に出校して山嵐を待ち受けた。ところがなかなか出て来ない。うらなりが出て来る。漢学の先生が出�
 �来る。野だが出て来る。しまいには赤シャツまで出て来たが山嵐の机の上は白墨《はくぼく》が一本｜竪《たて》に寝ているだけで閑静《かんせい》なものだ。おれは、控所《ひかえじょ》へはいるや否や返そうと思って、うちを出る時から、湯銭のように手の平へ入れて一銭五厘、学校まで握《にぎ》って来た。おれは膏《あぶら》っ手だから、開けてみると一銭五厘が汗《あせ》をかいている。汗をかいてる銭を返しちゃ、山嵐が何とか云うだろうと思ったから、机の上へ置いてふうふう吹いてまた握った。ところへ赤シャツが来て昨日は失敬、迷惑《めいわく》でしたろうと云ったから、迷惑じゃありません、お蔭で腹が減りましたと答えた。すると赤シャツは山嵐の机の上へ肱《ひじ》を突《つ》いて、あの盤台面《ばんだいづら
 》をおれの鼻の側面へ持って来たから、何をするかと思ったら、君昨日返りがけに船の中で話した事は、秘密にしてくれたまえ。まだ誰《だれ》にも話しやしますまいねと云った。女のような声を出すだけに心配性な男と見える。話さない事はたしかである。しかしこれから話そうと云う心持ちで、すでに一銭五厘手の平に用意しているくらいだから、ここで赤シャツから口留めをされちゃ、ちと困る。赤シャツも赤シャツだ。山嵐と名を指さないにしろ、あれほど推察の出来る謎《なぞ》をかけておきながら、今さらその謎を解いちゃ迷惑だとは教頭とも思えぬ無責任だ。元来ならおれが山嵐と戦争をはじめて鎬《しのぎ》を削《けず》ってる真中《まんなか》へ出て堂々とおれの肩《かた》を持つべきだ。それでこそ一校の教頭で、赤�
 ��ャツを着ている主意も立つというもんだ。　おれは教頭に向《むか》って、まだ誰にも話さないが、これから山嵐と談判するつもりだと云ったら、赤シャツは大いに狼狽《ろうばい》して、君そんな無法な事をしちゃ困る。僕《ぼく》は堀田《ほった》君の事について、別段君に何も明言した覚えはないんだから――君がもしここで乱暴を働いてくれると、僕は非常に迷惑する。君は学校に騒動《そうどう》を起すつもりで来たんじゃなかろうと妙《みょう》に常識をはずれた質問をするから、当《あた》り前《まえ》です、月給をもらったり、騒動を起したりしちゃ、学校の方でも困るでしょうと云った。すると赤シャツはそれじゃ昨日の事は君の参考だけにとめて、口外してくれるなと汗をかいて依頼《いらい》に及《およ》ぶから�
 �よろしい、僕も困るんだが、そんなにあなたが迷惑ならよしましょうと受け合った。君｜大丈夫《だいじょうぶ》かいと赤シャツは念を押《お》した。どこまで女らしいんだか奥行《おくゆき》がわからない。文学士なんて、みんなあんな連中ならつまらんものだ。辻褄《つじつま》の合わない、論理に欠けた注文をして恬然《てんぜん》としている。しかもこのおれを疑ぐってる。憚《はばか》りながら男だ。受け合った事を裏へ廻って反古《ほご》にするようなさもしい了見《りょうけん》はもってるもんか。　ところへ両隣《りょうどな》りの机の所有主も出校したんで、赤シャツは早々自分の席へ帰って行った。赤シャツは歩《あ》るき方から気取ってる。部屋の中を往来するのでも、音を立てないように靴《くつ》の底をそっと
 落《おと》す。音を立てないであるくのが自慢《じまん》になるもんだとは、この時から始めて知った。泥棒《どろぼう》の稽古《けいこ》じゃあるまいし、当り前にするがいい。やがて始業の喇叭《らっぱ》がなった。山嵐はとうとう出て来ない。仕方がないから、一銭五厘を机の上へ置いて教場へ出掛《でか》けた。　授業の都合《つごう》で一時間目は少し後《おく》れて、控所へ帰ったら、ほかの教師はみんな机を控えて話をしている。山嵐もいつの間にか来ている。欠勤だと思ったら遅刻《ちこく》したんだ。おれの顔を見るや否や今日は君のお蔭で遅刻したんだ。罰金《ばっきん》を出したまえと云った。おれは机の上にあった一銭五厘を出して、これをやるから取っておけ。先達《せんだっ》て通町《とおりちょう》で飲ん�
 ��氷水の代だと山嵐の前へ置くと、何を云ってるんだと笑いかけたが、おれが存外｜真面目《まじめ》でいるので、つまらない冗談《じょうだん》をするなと銭をおれの机の上に掃《は》き返した。おや山嵐の癖《くせ》にどこまでも奢る気だな。「冗談じゃない本当だ。おれは君に氷水を奢られる因縁《いんえん》がないから、出すんだ。取らない法があるか」「そんなに一銭五厘が気になるなら取ってもいいが、なぜ思い出したように、今時分返すんだ」「今時分でも、いつ時分でも、返すんだ。奢られるのが、いやだから返すんだ」　山嵐は冷然とおれの顔を見てふんと云った。赤シャツの依頼がなければ、ここで山嵐の卑劣《ひれつ》をあばいて大喧嘩をしてやるんだが、口外しないと受け合ったんだから動きがとれない。人がこ�
 �なに真赤《まっか》になってるのにふんという理窟《りくつ》があるものか。「氷水の代は受け取るから、下宿は出てくれ」「一銭五厘受け取ればそれでいい。下宿を出ようが出まいがおれの勝手だ」「ところが勝手でない、昨日、あすこの亭主《ていしゅ》が来て君に出てもらいたいと云うから、その訳を聞いたら亭主の云うのはもっともだ。それでももう一応たしかめるつもりで今朝《けさ》あすこへ寄って詳《くわ》しい話を聞いてきたんだ」　おれには山嵐の云う事が何の意味だか分らない。「亭主が君に何を話したんだか、おれが知ってるもんか。そう自分だけで極めたって仕様があるか。訳があるなら、訳を話すが順だ。てんから亭主の云う方がもっともだなんて失敬千万な事を云うな」「うん、そんなら云ってやろう。君は
 乱暴であの下宿で持て余《あ》まされているんだ。いくら下宿の女房だって、下女たあ違うぜ。足を出して拭《ふ》かせるなんて、威張《いば》り過ぎるさ」「おれが、いつ下宿の女房に足を拭かせた」「拭かせたかどうだか知らないが、とにかく向うじゃ、君に困ってるんだ。下宿料の十円や十五円は懸物《かけもの》を一｜幅《ぷく》売りゃ、すぐ浮《う》いてくるって云ってたぜ」「利いた風な事をぬかす野郎《やろう》だ。そんなら、なぜ置いた」「なぜ置いたか、僕は知らん、置くことは置いたんだが、いやになったんだから、出ろと云うんだろう。君出てやれ」「当り前だ。居てくれと手を合せたって、居るものか。一体そんな云い懸《がか》りを云うような所へ周旋《しゅうせん》する君からしてが不埒《ふらち》だ」「お�
 ��が不埒か、君が大人《おとな》しくないんだか、どっちかだろう」　山嵐もおれに劣《おと》らぬ肝癪持《かんしゃくも》ちだから、負け嫌《ぎら》いな大きな声を出す。控所に居た連中は何事が始まったかと思って、みんな、おれと山嵐の方を見て、顋《あご》を長くしてぼんやりしている。おれは、別に恥《は》ずかしい事をした覚えはないんだから、立ち上がりながら、部屋中一通り見巡《みま》わしてやった。みんなが驚《おど》ろいてるなかに野だだけは面白そうに笑っていた。おれの大きな眼《め》が、貴様も喧嘩をするつもりかと云う権幕で、野だの干瓢《かんぴょう》づらを射貫《いぬ》いた時に、野だは突然《とつぜん》真面目な顔をして、大いにつつしんだ。少し怖《こ》わかったと見える。そのうち喇叭が鳴る。�
 �嵐もおれも喧嘩を中止して教場へ出た。　午後は、先夜おれに対して無礼を働いた寄宿生の処分法についての会議だ。会議というものは生れて始めてだからとんと容子《ようす》が分らないが、職員が寄って、たかって自分勝手な説をたてて、それを校長が好い加減に纏《まと》めるのだろう。纏めるというのは黒白《こくびゃく》の決しかねる事柄《ことがら》について云うべき言葉だ。この場合のような、誰が見たって、不都合としか思われない事件に会議をするのは暇潰《ひまつぶ》しだ。誰が何と解釈したって異説の出ようはずがない。こんな明白なのは即座《そくざ》に校長が処分してしまえばいいに。随分《ずいぶん》決断のない事だ。校長ってものが、これならば、何の事はない、煮《に》え切《き》らない愚図《ぐず》の
 異名だ。　会議室は校長室の隣《とな》りにある細長い部屋で、平常は食堂の代理を勤める。黒い皮で張った椅子《いす》が二十｜脚《きゃく》ばかり、長いテーブルの周囲に並《なら》んでちょっと神田の西洋料理屋ぐらいな格だ。そのテーブルの端《はじ》に校長が坐《すわ》って、校長の隣りに赤シャツが構える。あとは勝手次第に席に着くんだそうだが、体操《たいそう》の教師だけはいつも席末に謙遜《けんそん》するという話だ。おれは様子が分らないから、博物の教師と漢学の教師の間へはいり込《こ》んだ。向うを見ると山嵐と野だが並んでる。野だの顔はどう考えても劣等だ。喧嘩はしても山嵐の方が遥《はる》かに趣《おもむき》がある。おやじの葬式《そうしき》の時に小日向《こびなた》の養源寺《ようげんじ》�
 ��座敷《ざしき》にかかってた懸物はこの顔によく似ている。坊主《ぼうず》に聞いてみたら韋駄天《いだてん》と云う怪物だそうだ。今日は怒《おこ》ってるから、眼をぐるぐる廻しちゃ、時々おれの方を見る。そんな事で威嚇《おど》かされてたまるもんかと、おれも負けない気で、やっぱり眼をぐりつかせて、山嵐をにらめてやった。おれの眼は恰好《かっこう》はよくないが、大きい事においては大抵な人には負けない。あなたは眼が大きいから役者になるときっと似合いますと清がよく云ったくらいだ。　もう大抵お揃《そろ》いでしょうかと校長が云うと、書記の川村と云うのが一つ二つと頭数を勘定《かんじょう》してみる。一人足りない。一人不足ですがと考えていたが、これは足りないはずだ。唐茄子《とうなす》のう�
 �なり君が来ていない。おれとうらなり君とはどう云う宿世《すくせ》の因縁かしらないが、この人の顔を見て以来どうしても忘れられない。控所へくれば、すぐ、うらなり君が眼に付く、途中《とちゅう》をあるいていても、うらなり先生の様子が心に浮《うか》ぶ。温泉へ行くと、うらなり君が時々｜蒼《あお》い顔をして湯壺《ゆつぼ》のなかに膨《ふく》れている。挨拶《あいさつ》をするとへえと恐縮《きょうしゅく》して頭を下げるから気の毒になる。学校へ出てうらなり君ほど大人しい人は居ない。めったに笑った事もないが、余計な口をきいた事もない。おれは君子という言葉を書物の上で知ってるが、これは字引にあるばかりで、生きてるものではないと思ってたが、うらなり君に逢《あ》ってから始めて、やっぱり正体
 のある文字だと感心したくらいだ。　このくらい関係の深い人の事だから、会議室へはいるや否や、うらなり君の居ないのは、すぐ気がついた。実を云うと、この男の次へでも坐《す》わろうかと、ひそかに目標《めじるし》にして来たくらいだ。校長はもうやがて見えるでしょうと、自分の前にある紫《むらさき》の袱紗包《ふくさづつみ》をほどいて、蒟蒻版《こんにゃくばん》のような者を読んでいる。赤シャツは琥珀《こはく》のパイプを絹ハンケチで磨《みが》き始めた。この男はこれが道楽である。赤シャツ相当のところだろう。ほかの連中は隣り同志で何だか私語《ささや》き合っている。手持無沙汰《てもちぶさた》なのは鉛筆《えんぴつ》の尻《しり》に着いている、護謨《ゴム》の頭でテーブルの上へしきりに何か書�
 ��ている。野だは時々山嵐に話しかけるが、山嵐は一向応じない。ただうん［＃「うん」に傍点］とかああ［＃「ああ」に傍点］と云うばかりで、時々｜怖《こわ》い眼をして、おれの方を見る。おれも負けずに睨《にら》め返す。　ところへ待ちかねた、うらなり君が気の毒そうにはいって来て少々用事がありまして、遅刻｜致《いた》しましたと慇懃《いんぎん》に狸《たぬき》に挨拶《あいさつ》をした。では会議を開きますと狸はまず書記の川村君に蒟蒻版を配布させる。見ると最初が処分の件、次が生徒｜取締《とりしまり》の件、その他二三ヶ条である。狸は例の通りもったいぶって、教育の生霊《いきりょう》という見えでこんな意味の事を述べた。「学校の職員や生徒に過失のあるのは、みんな自分の寡徳《かとく》の致�
 �ところで、何か事件がある度に、自分はよくこれで校長が勤まるとひそかに慚愧《ざんき》の念に堪《た》えんが、不幸にして今回もまたかかる騒動を引き起したのは、深く諸君に向って謝罪しなければならん。しかしひとたび起った以上は仕方がない、どうにか処分をせんければならん、事実はすでに諸君のご承知の通りであるからして、善後策について腹蔵のない事を参考のためにお述べ下さい」　おれは校長の言葉を聞いて、なるほど校長だの狸だのと云うものは、えらい事を云うもんだと感心した。こう校長が何もかも責任を受けて、自分の咎《とが》だとか、不徳だとか云うくらいなら、生徒を処分するのは、やめにして、自分から先へ免職《めんしょく》になったら、よさそうなもんだ。そうすればこんな面倒《めんどう》な
 会議なんぞを開く必要もなくなる訳だ。第一常識から云《い》っても分ってる。おれが大人しく宿直をする。生徒が乱暴をする。わるいのは校長でもなけりゃ、おれでもない、生徒だけに極《きま》ってる。もし山嵐が煽動《せんどう》したとすれば、生徒と山嵐を退治《たいじ》ればそれでたくさんだ。人の尻《しり》を自分で背負《しょ》い込《こ》んで、おれの尻だ、おれの尻だと吹き散らかす奴が、どこの国にあるもんか、狸でなくっちゃ出来る芸当じゃない。彼《かれ》はこんな条理《じょうり》に適《かな》わない議論を吐《は》いて、得意気に一同を見廻した。ところが誰も口を開くものがない。博物の教師は第一教場の屋根に烏《からす》がとまってるのを眺《なが》めている。漢学の先生は蒟蒻版《こんにゃくばん》を�
 ��《たた》んだり、延ばしたりしてる。山嵐はまだおれの顔をにらめている。会議と云うものが、こんな馬鹿気《ばかげ》たものなら、欠席して昼寝でもしている方がましだ。　おれは、じれったくなったから、一番大いに弁じてやろうと思って、半分尻をあげかけたら、赤シャツが何か云い出したから、やめにした。見るとパイプをしまって、縞《しま》のある絹ハンケチで顔をふきながら、何か云っている。あの手巾《はんけち》はきっとマドンナから巻き上げたに相違《そうい》ない。男は白い麻《あさ》を使うもんだ。「私も寄宿生の乱暴を聞いてはなはだ教頭として不行届《ふゆきとどき》であり、かつ平常の徳化が少年に及ばなかったのを深く慚《は》ずるのであります。でこう云う事は、何か陥欠《かんけつ》があると起る�
 �ので、事件その物を見ると何だか生徒だけがわるいようであるが、その真相を極めると責任はかえって学校にあるかも知れない。だから表面上にあらわれたところだけで厳重な制裁を加えるのは、かえって未来のためによくないかとも思われます。かつ少年血気のものであるから活気があふれて、善悪の考えはなく、半ば無意識にこんな悪戯《いたずら》をやる事はないとも限らん。でもとより処分法は校長のお考えにある事だから、私の容喙《ようかい》する限りではないが、どうかその辺をご斟酌《しんしゃく》になって、なるべく寛大なお取計《とりはからい》を願いたいと思います」　なるほど狸が狸なら、赤シャツも赤シャツだ。生徒があばれるのは、生徒がわるいんじゃない教師が悪るいんだと公言している。気狂《きちがい
 》が人の頭を撲《なぐ》り付けるのは、なぐられた人がわるいから、気狂がなぐるんだそうだ。難有《ありがた》い仕合せだ。活気にみちて困るなら運動場へ出て相撲《すもう》でも取るがいい、半ば無意識に床の中へバッタを入れられてたまるものか。この様子じゃ寝頸《ねくび》をかかれても、半ば無意識だって放免するつもりだろう。　おれはこう考えて何か云おうかなと考えてみたが、云うなら人を驚ろすかように滔々《とうとう》と述べたてなくっちゃつまらない、おれの癖として、腹が立ったときに口をきくと、二言か三言で必ず行き塞《つま》ってしまう。狸でも赤シャツでも人物から云うと、おれよりも下等だが、弁舌はなかなか達者だから、まずい事を喋舌《しゃべ》って揚足《あげあし》を取られちゃ面白くない。ち�
 ��っと腹案を作ってみようと、胸のなかで文章を作ってる。すると前に居た野だが突然起立したには驚ろいた。野だの癖に意見を述べるなんて生意気だ。野だは例のへらへら調で「実に今回のバッタ事件及び咄喊《とっかん》事件は吾々《われわれ》心ある職員をして、ひそかに吾《わが》校将来の前途《ぜんと》に危惧《きぐ》の念を抱《いだ》かしむるに足る珍事《ちんじ》でありまして、吾々職員たるものはこの際｜奮《ふる》って自ら省りみて、全校の風紀を振粛《しんしゅく》しなければなりません。それでただ今校長及び教頭のお述べになったお説は、実に肯綮《こうけい》に中《あた》った剴切《がいせつ》なお考えで私は徹頭徹尾《てっとうてつび》賛成致します。どうかなるべく寛大《かんだい》のご処分を仰《あお》�
 �たいと思います」と云った。野だの云う事は言語はあるが意味がない、漢語をのべつに陳列《ちんれつ》するぎりで訳が分らない。分ったのは徹頭徹尾賛成致しますと云う言葉だけだ。　おれは野だの云う意味は分らないけれども、何だか非常に腹が立ったから、腹案も出来ないうちに起《た》ち上がってしまった。「私は徹頭徹尾反対です……」と云ったがあとが急に出て来ない。「……そんな頓珍漢《とんちんかん》な、処分は大嫌《だいきら》いです」とつけたら、職員が一同笑い出した。「一体生徒が全然｜悪《わ》るいです。どうしても詫《あや》まらせなくっちゃ、癖になります。退校さしても構いません。……何だ失敬な、新しく来た教師だと思って……」と云って着席した。すると右隣りに居る博物が「生徒がわるい事も
 、わるいが、あまり厳重な罰などをするとかえって反動を起していけないでしょう。やっぱり教頭のおっしゃる通り、寛な方に賛成します」と弱い事を云った。左隣の漢学は穏便説《おんびんせつ》に賛成と云った。歴史も教頭と同説だと云った。忌々《いまいま》しい、大抵のものは赤シャツ党だ。こんな連中が寄り合って学校を立てていりゃ世話はない。おれは生徒をあやまらせるか、辞職するか二つのうち一つに極めてるんだから、もし赤シャツが勝ちを制したら、早速うちへ帰って荷作りをする覚悟《かくご》でいた。どうせ、こんな手合《てあい》を弁口《べんこう》で屈伏《くっぷく》させる手際はなし、させたところでいつまでご交際を願うのは、こっちでご免だ。学校に居ないとすればどうなったって構うもんか。また何�
 ��云うと笑うに違いない。だれが云うもんかと澄《すま》していた。　すると今までだまって聞いていた山嵐が奮然として、起ち上がった。野郎また赤シャツ賛成の意を表するな、どうせ、貴様とは喧嘩だ、勝手にしろと見ていると山嵐は硝子《ガラス》窓を振《ふる》わせるような声で「私《わたくし》は教頭及びその他諸君のお説には全然不同意であります。というものはこの事件はどの点から見ても、五十名の寄宿生が新来の教師｜某氏《ぼうし》を軽侮《けいぶ》してこれを翻弄《ほんろう》しようとした所為《しょい》とより外《ほか》には認められんのであります。教頭はその源因を教師の人物いかんにお求めになるようでありますが失礼ながらそれは失言かと思います。某氏が宿直にあたられたのは着後早々の事で、まだ生�
 �に接せられてから二十日に満たぬ頃《ころ》であります。この短かい二十日間において生徒は君の学問人物を評価し得る余地がないのであります。軽侮されべき至当な理由があって、軽侮を受けたのなら生徒の行為に斟酌《しんしゃく》を加える理由もありましょうが、何らの源因もないのに新来の先生を愚弄《ぐろう》するような軽薄な生徒を寛仮《かんか》しては学校の威信《いしん》に関わる事と思います。教育の精神は単に学問を授けるばかりではない、高尚《こうしょう》な、正直な、武士的な元気を鼓吹《こすい》すると同時に、野卑《やひ》な、軽躁《けいそう》な、暴慢《ぼうまん》な悪風を掃蕩《そうとう》するにあると思います。もし反動が恐《おそろ》しいの、騒動が大きくなるのと姑息《こそく》な事を云った日
 にはこの弊風《へいふう》はいつ矯正《きょうせい》出来るか知れません。かかる弊風を杜絶《とぜつ》するためにこそ吾々はこの学校に職を奉じているので、これを見逃《みの》がすくらいなら始めから教師にならん方がいいと思います。私は以上の理由で寄宿生一同を厳罰《げんばつ》に処する上に、当該《とうがい》教師の面前において公けに謝罪の意を表せしむるのを至当の所置と心得ます」と云いながら、どんと腰《こし》を卸《おろ》した。一同はだまって何にも言わない。赤シャツはまたパイプを拭《ふ》き始めた。おれは何だか非常に嬉《うれ》しかった。おれの云おうと思うところをおれの代りに山嵐がすっかり言ってくれたようなものだ。おれはこう云う単純な人間だから、今までの喧嘩はまるで忘れて、大いに難有�
 ��ありがた》いと云う顔をもって、腰を卸した山嵐の方を見たら、山嵐は一向知らん面《かお》をしている。　しばらくして山嵐はまた起立した。「ただ今ちょっと失念して言い落《おと》しましたから、申します。当夜の宿直員は宿直中外出して温泉に行かれたようであるが、あれはもっての外の事と考えます。いやしくも自分が一校の留守番を引き受けながら、咎《とが》める者のないのを幸《さいわい》に、場所もあろうに温泉などへ入湯にいくなどと云うのは大きな失体である。生徒は生徒として、この点については校長からとくに責任者にご注意あらん事を希望します」　妙な奴だ、ほめたと思ったら、あとからすぐ人の失策をあばいている。おれは何の気もなく、前の宿直が出あるいた事を知って、そんな習慣だと思って、つ�
 �温泉まで行ってしまったんだが、なるほどそう云われてみると、これはおれが悪るかった。攻撃《こうげき》されても仕方がない。そこでおれはまた起って「私は正に宿直中に温泉に行きました。これは全くわるい。あやまります」と云って着席したら、一同がまた笑い出した。おれが何か云いさえすれば笑う。つまらん奴等《やつら》だ。貴様等これほど自分のわるい事を公けにわるかったと断言出来るか、出来ないから笑うんだろう。　それから校長は、もう大抵ご意見もないようでありますから、よく考えた上で処分しましょうと云った。ついでだからその結果を云うと、寄宿生は一週間の禁足になった上に、おれの前へ出て謝罪をした。謝罪をしなければその時辞職して帰るところだったがなまじい、おれのいう通りになったので
 とうとう大変な事になってしまった。それはあとから話すが、校長はこの時会議の引き続きだと号してこんな事を云った。生徒の風儀《ふうぎ》は、教師の感化で正していかなくてはならん、その一着手として、教師はなるべく飲食店などに出入《しゅつにゅう》しない事にしたい。もっとも送別会などの節は特別であるが、単独にあまり上等でない場所へ行くのはよしたい――たとえば蕎麦屋《そばや》だの、団子屋《だんごや》だの――と云いかけたらまた一同が笑った。野だが山嵐を見て天麩羅《てんぷら》と云って目くばせをしたが山嵐は取り合わなかった。いい気味《きび》だ。　おれは脳がわるいから、狸の云うことなんか、よく分らないが、蕎麦屋や団子屋へ行って、中学の教師が勤まらなくっちゃ、おれみたような食い心�
 ��《しんぼう》にゃ到底《とうてい》出来っ子ないと思った。それなら、それでいいから、初手から蕎麦と団子の嫌いなものと注文して雇《やと》うがいい。だんまりで辞令を下げておいて、蕎麦を食うな、団子を食うなと罪なお布令《ふれ》を出すのは、おれのような外に道楽のないものにとっては大変な打撃だ。すると赤シャツがまた口を出した。「元来中学の教師なぞは社会の上流にくらいするものだからして、単に物質的の快楽ばかり求めるべきものでない。その方に耽《ふけ》るとつい品性にわるい影響《えいきょう》を及ぼすようになる。しかし人間だから、何か娯楽《ごらく》がないと、田舎《いなか》へ来て狭《せま》い土地では到底｜暮《くら》せるものではない。それで釣《つり》に行くとか、文学書を読むとか、ま�
 �は新体詩や俳句を作るとか、何でも高尚《こうしょう》な精神的娯楽を求めなくってはいけない……」　だまって聞いてると勝手な熱を吹く。沖《おき》へ行って肥料《こやし》を釣ったり、ゴルキが露西亜《ロシア》の文学者だったり、馴染《なじみ》の芸者が松《まつ》の木の下に立ったり、古池へ蛙《かわず》が飛び込んだりするのが精神的娯楽なら、天麩羅を食って団子を呑《の》み込むのも精神的娯楽だ。そんな下さらない娯楽を授けるより赤シャツの洗濯《せんたく》でもするがいい。あんまり腹が立ったから「マドンナに逢《あ》うのも精神的娯楽ですか」と聞いてやった。すると今度は誰も笑わない。妙な顔をして互《たがい》に眼と眼を見合せている。赤シャツ自身は苦しそうに下を向いた。それ見ろ。利いたろう。た
 だ気の毒だったのはうらなり君で、おれが、こう云ったら蒼い顔をますます蒼くした。　　　　　七　おれは即夜《そくや》下宿を引き払《はら》った。宿へ帰って荷物をまとめていると、女房《にょうぼう》が何か不都合《ふつごう》でもございましたか、お腹の立つ事があるなら、云《い》っておくれたら改めますと云う。どうも驚《おど》ろく。世の中にはどうして、こんな要領を得ない者ばかり揃《そろ》ってるんだろう。出てもらいたいんだか、居てもらいたいんだか分《わか》りゃしない。まるで気狂《きちがい》だ。こんな者を相手に喧嘩《けんか》をしたって江戸《えど》っ子の名折れだから、車屋をつれて来てさっさと出てきた。　出た事は出たが、どこへ行くというあてもない。車屋が、どちらへ参りますと云うから�
 ��だまって尾《つ》いて来い、今にわかる、と云って、すたすたやって来た。面倒《めんどう》だから山城屋へ行こうかとも考えたが、また出なければならないから、つまり手数だ。こうして歩いてるうちには下宿とか、何とか看板のあるうちを目付け出すだろう。そうしたら、そこが天意に叶《かな》ったわが宿と云う事にしよう。とぐるぐる、閑静《かんせい》で住みよさそうな所をあるいているうち、とうとう鍛冶屋町《かじやちょう》へ出てしまった。ここは士族｜屋敷《やしき》で下宿屋などのある町ではないから、もっと賑《にぎ》やかな方へ引き返そうかとも思ったが、ふといい事を考え付いた。おれが敬愛するうらなり君はこの町内に住んでいる。うらなり君は土地の人で先祖代々の屋敷を控《ひか》えているくらいだか�
 �、この辺の事情には通じているに相違《そうい》ない。あの人を尋《たず》ねて聞いたら、よさそうな下宿を教えてくれるかも知れない。幸《さいわい》一度｜挨拶《あいさつ》に来て勝手は知ってるから、捜《さ》がしてあるく面倒はない。ここだろうと、いい加減に見当をつけて、ご免《めん》ご免と二返ばかり云うと、奥《おく》から五十ぐらいな年寄《としより》が古風な紙燭《しそく》をつけて、出て来た。おれは若い女も嫌《きら》いではないが、年寄を見ると何だかなつかしい心持ちがする。大方｜清《きよ》がすきだから、その魂《たましい》が方々のお婆《ばあ》さんに乗り移るんだろう。これは大方うらなり君のおっ母《か》さんだろう。切り下げの品格のある婦人だが、よくうらなり君に似ている。まあお上がりと
 云うところを、ちょっとお目にかかりたいからと、主人を玄関《げんかん》まで呼び出して実はこれこれだが君どこか心当りはありませんかと尋ねてみた。うらなり先生それはさぞお困りでございましょう、としばらく考えていたが、この裏町に萩野《はぎの》と云って老人夫婦ぎりで暮《く》らしているものがある、いつぞや座敷《ざしき》を明けておいても無駄《むだ》だから、たしかな人があるなら貸してもいいから周旋《しゅうせん》してくれと頼《たの》んだ事がある。今でも貸すかどうか分らんが、まあいっしょに行って聞いてみましょうと、親切に連れて行ってくれた。　その夜から萩野の家の下宿人となった。驚《おどろ》いたのは、おれがいか銀の座敷を引き払うと、翌日《あくるひ》から入れ違《ちが》いに野だが平�
 ��な顔をして、おれの居た部屋を占領《せんりょう》した事だ。さすがのおれもこれにはあきれた。世の中はいかさま師ばかりで、お互《たがい》に乗せっこをしているのかも知れない。いやになった。　世間がこんなものなら、おれも負けない気で、世間並《せけんなみ》にしなくちゃ、遣《や》りきれない訳になる。巾着切《きんちゃくきり》の上前をはねなければ三度のご膳《ぜん》が戴《いただ》けないと、事が極《き》まればこうして、生きてるのも考え物だ。と云ってぴんぴんした達者なからだで、首を縊《くく》っちゃ先祖へ済まない上に、外聞が悪い。考えると物理学校などへはいって、数学なんて役にも立たない芸を覚えるよりも、六百円を資本《もとで》にして牛乳屋でも始めればよかった。そうすれば清もおれの傍�
 �そば》を離《はな》れずに済むし、おれも遠くから婆さんの事を心配しずに暮《くら》される。いっしょに居るうちは、そうでもなかったが、こうして田舎《いなか》へ来てみると清はやっぱり善人だ。あんな気立《きだて》のいい女は日本中さがして歩いたってめったにはない。婆さん、おれの立つときに、少々｜風邪《かぜ》を引いていたが今頃《いまごろ》はどうしてるか知らん。先だっての手紙を見たらさぞ喜んだろう。それにしても、もう返事がきそうなものだが――おれはこんな事ばかり考えて二三日暮していた。　気になるから、宿のお婆さんに、東京から手紙は来ませんかと時々｜尋《たず》ねてみるが、聞くたんびに何にも参りませんと気の毒そうな顔をする。ここの夫婦はいか銀とは違って、もとが士族だけに双方《
 そうほう》共上品だ。爺《じい》さんが夜《よ》るになると、変な声を出して謡《うたい》をうたうには閉口するが、いか銀のようにお茶を入れましょうと無暗《むやみ》に出て来ないから大きに楽だ。お婆さんは時々部屋へ来ていろいろな話をする。どうして奥さんをお連れなさって、いっしょにお出《い》でなんだのぞなもしなどと質問をする。奥さんがあるように見えますかね。可哀想《かわいそう》にこれでもまだ二十四ですぜと云ったらそれでも、あなた二十四で奥さんがおありなさるのは当り前ぞなもしと冒頭《ぼうとう》を置いて、どこの誰《だれ》さんは二十でお嫁《よめ》をお貰《もら》いたの、どこの何とかさんは二十二で子供を二人《ふたり》お持ちたのと、何でも例を半ダースばかり挙げて反駁《はんばく》を試�
 ��たには恐《おそ》れ入った。それじゃ僕《ぼく》も二十四でお嫁をお貰いるけれ、世話をしておくれんかなと田舎言葉を真似《まね》て頼んでみたら、お婆さん正直に本当かなもしと聞いた。「本当の本当《ほんま》のって僕あ、嫁が貰いたくって仕方がないんだ」「そうじゃろうがな、もし。若いうちは誰もそんなものじゃけれ」この挨拶《あいさつ》には痛み入って返事が出来なかった。「しかし先生はもう、お嫁がおありなさるに極《きま》っとらい。私はちゃんと、もう、睨《ね》らんどるぞなもし」「へえ、活眼《かつがん》だね。どうして、睨らんどるんですか」「どうしててて。東京から便りはないか、便りはないかてて、毎日便りを待ち焦《こ》がれておいでるじゃないかなもし」「こいつあ驚《おどろ》いた。大変な�
 �眼だ」「中《あた》りましたろうがな、もし」「そうですね。中ったかも知れませんよ」「しかし今時の女子《おなご》は、昔《むかし》と違《ちご》うて油断が出来んけれ、お気をお付けたがええぞなもし」「何ですかい、僕の奥さんが東京で間男でもこしらえていますかい」「いいえ、あなたの奥さんはたしかじゃけれど……」「それで、やっと安心した。それじゃ何を気を付けるんですい」「あなたのはたしか――あなたのはたしかじゃが――」「どこに不たしかなのが居ますかね」「ここ等《ら》にも大分｜居《お》ります。先生、あの遠山のお嬢《じょう》さんをご存知かなもし」「いいえ、知りませんね」「まだご存知ないかなもし。ここらであなた一番の別嬪《べっぴん》さんじゃがなもし。あまり別嬪さんじゃけれ、学校
 の先生方はみんなマドンナマドンナと言うといでるぞなもし。まだお聞きんのかなもし」「うん、マドンナですか。僕あ芸者の名かと思った」「いいえ、あなた。マドンナと云うと唐人《とうじん》の言葉で、別嬪さんの事じゃろうがなもし」「そうかも知れないね。驚いた」「大方画学の先生がお付けた名ぞなもし」「野だがつけたんですかい」「いいえ、あの吉川《よしかわ》先生がお付けたのじゃがなもし」「そのマドンナが不たしかなんですかい」「そのマドンナさんが不たしかなマドンナさんでな、もし」「厄介《やっかい》だね。渾名《あだな》の付いてる女にゃ昔から碌《ろく》なものは居ませんからね。そうかも知れませんよ」「ほん当にそうじゃなもし。鬼神《きじん》のお松《まつ》じゃの、妲妃《だっき》のお百じ�
 ��のてて怖《こわ》い女が居《お》りましたなもし」「マドンナもその同類なんですかね」「そのマドンナさんがなもし、あなた。そらあの、あなたをここへ世話をしておくれた古賀先生なもし――あの方の所へお嫁《よめ》に行く約束《やくそく》が出来ていたのじゃがなもし――」「へえ、不思議なもんですね。あのうらなり君が、そんな艶福《えんぷく》のある男とは思わなかった。人は見懸《みか》けによらない者だな。ちっと気を付けよう」「ところが、去年あすこのお父さんが、お亡くなりて、――それまではお金もあるし、銀行の株も持ってお出《いで》るし、万事｜都合《つごう》がよかったのじゃが――それからというものは、どういうものか急に暮し向きが思わしくなくなって――つまり古賀さんがあまりお人が好過�
 �よす》ぎるけれ、お欺《だま》されたんぞなもし。それや、これやでお輿入《こしいれ》も延びているところへ、あの教頭さんがお出《い》でて、是非お嫁にほしいとお云いるのじゃがなもし」「あの赤シャツがですか。ひどい奴《やつ》だ。どうもあのシャツはただのシャツじゃないと思ってた。それから？」「人を頼んで懸合《かけお》うておみると、遠山さんでも古賀さんに義理があるから、すぐには返事は出来かねて――まあよう考えてみようぐらいの挨拶をおしたのじゃがなもし。すると赤シャツさんが、手蔓《てづる》を求めて遠山さんの方へ出入《でいり》をおしるようになって、とうとうあなた、お嬢さんを手馴付《てなづ》けておしまいたのじゃがなもし。赤シャツさんも赤シャツさんじゃが、お嬢さんもお嬢さんじゃ
 てて、みんなが悪《わ》るく云いますのよ。いったん古賀さんへ嫁に行くてて承知をしときながら、今さら学士さんがお出《いで》たけれ、その方に替《か》えよてて、それじゃ今日様《こんにちさま》へ済むまいがなもし、あなた」「全く済まないね。今日様どころか明日様にも明後日様にも、いつまで行ったって済みっこありませんね」「それで古賀さんにお気の毒じゃてて、お友達の堀田《ほった》さんが教頭の所へ意見をしにお行きたら、赤シャツさんが、あしは約束のあるものを横取りするつもりはない。破約になれば貰うかも知れんが、今のところは遠山家とただ交際をしているばかりじゃ、遠山家と交際をするには別段古賀さんに済まん事もなかろうとお云いるけれ、堀田さんも仕方がなしにお戻《もど》りたそうな。赤シ�
 ��ツさんと堀田さんは、それ以来｜折合《おりあい》がわるいという評判ぞなもし」「よくいろいろな事を知ってますね。どうして、そんな詳《くわ》しい事が分るんですか。感心しちまった」「狭《せま》いけれ何でも分りますぞなもし」　分り過ぎて困るくらいだ。この容子《ようす》じゃおれの天麩羅《てんぷら》や団子《だんご》の事も知ってるかも知れない。厄介《やっかい》な所だ。しかしお蔭様《かげさま》でマドンナの意味もわかるし、山嵐と赤シャツの関係もわかるし大いに後学になった。ただ困るのはどっちが悪る者だか判然しない。おれのような単純なものには白とか黒とか片づけてもらわないと、どっちへ味方をしていいか分らない。「赤シャツと山嵐たあ、どっちがいい人ですかね」「山嵐て何ぞなもし」「山�
 �というのは堀田の事ですよ」「そりゃ強い事は堀田さんの方が強そうじゃけれど、しかし赤シャツさんは学士さんじゃけれ、働きはある方《かた》ぞな、もし。それから優しい事も赤シャツさんの方が優しいが、生徒の評判は堀田さんの方がええというぞなもし」「つまりどっちがいいんですかね」「つまり月給の多い方が豪《えら》いのじゃろうがなもし」　これじゃ聞いたって仕方がないから、やめにした。それから二三日して学校から帰るとお婆さんがにこにこして、へえお待遠さま。やっと参りました。と一本の手紙を持って来てゆっくりご覧と云って出て行った。取り上げてみると清からの便りだ。符箋《ふせん》が二三｜枚《まい》ついてるから、よく調べると、山城屋から、いか銀の方へ廻《まわ》して、いか銀から、萩野
 《はぎの》へ廻って来たのである。その上山城屋では一週間ばかり逗留《とうりゅう》している。宿屋だけに手紙まで泊《とめ》るつもりなんだろう。開いてみると、非常に長いもんだ。坊《ぼ》っちゃんの手紙を頂いてから、すぐ返事をかこうと思ったが、あいにく風邪を引いて一週間ばかり寝《ね》ていたものだから、つい遅《おそ》くなって済まない。その上今時のお嬢さんのように読み書きが達者でないものだから、こんなまずい字でも、かくのによっぽど骨が折れる。甥《おい》に代筆を頼もうと思ったが、せっかくあげるのに自分でかかなくっちゃ、坊っちゃんに済まないと思って、わざわざ下《し》たがきを一返して、それから清書をした。清書をするには二日で済んだが、下た書きをするには四日かかった。読みにくいか�
 ��知れないが、これでも一生懸命《いっしょうけんめい》にかいたのだから、どうぞしまいまで読んでくれ。という冒頭《ぼうとう》で四尺ばかり何やらかやら認《したた》めてある。なるほど読みにくい。字がまずいばかりではない、大抵《たいてい》平仮名だから、どこで切れて、どこで始まるのだか句読《くとう》をつけるのによっぽど骨が折れる。おれは焦《せ》っ勝《か》ちな性分だから、こんな長くて、分りにくい手紙は、五円やるから読んでくれと頼まれても断わるのだが、この時ばかりは真面目《まじめ》になって、始《はじめ》から終《しまい》まで読み通した。読み通した事は事実だが、読む方に骨が折れて、意味がつながらないから、また頭から読み直してみた。部屋のなかは少し暗くなって、前の時より見にくく�
 �なったから、とうとう椽鼻《えんばな》へ出て腰《こし》をかけながら鄭寧《ていねい》に拝見した。すると初秋《はつあき》の風が芭蕉《ばしょう》の葉を動かして、素肌《すはだ》に吹《ふ》きつけた帰りに、読みかけた手紙を庭の方へなびかしたから、しまいぎわには四尺あまりの半切れがさらりさらりと鳴って、手を放すと、向《むこ》うの生垣まで飛んで行きそうだ。おれはそんな事には構っていられない。坊っちゃんは竹を割ったような気性だが、ただ肝癪《かんしゃく》が強過ぎてそれが心配になる。――ほかの人に無暗《むやみ》に渾名《あだな》なんか、つけるのは人に恨《うら》まれるもとになるから、やたらに使っちゃいけない、もしつけたら、清だけに手紙で知らせろ。――田舎者は人がわるいそうだから、気を
 つけてひどい目に遭《あ》わないようにしろ。――気候だって東京より不順に極ってるから、寝冷《ねびえ》をして風邪を引いてはいけない。坊っちゃんの手紙はあまり短過ぎて、容子がよくわからないから、この次にはせめてこの手紙の半分ぐらいの長さのを書いてくれ。――宿屋へ茶代を五円やるのはいいが、あとで困りゃしないか、田舎へ行って頼《たよ》りになるはお金ばかりだから、なるべく倹約《けんやく》して、万一の時に差支《さしつか》えないようにしなくっちゃいけない。――お小遣《こづかい》がなくて困るかも知れないから、為替《かわせ》で十円あげる。――先《せん》だって坊っちゃんからもらった五十円を、坊っちゃんが、東京へ帰って、うちを持つ時の足しにと思って、郵便局へ預けておいたが、この十�
 ��を引いてもまだ四十円あるから大丈夫だ。――なるほど女と云うものは細かいものだ。　おれが椽鼻で清の手紙をひらつかせながら、考え込《こ》んでいると、しきりの襖《ふすま》をあけて、萩野のお婆さんが晩めしを持ってきた。まだ見てお出《い》でるのかなもし。えっぽど長いお手紙じゃなもし、と云ったから、ええ大事な手紙だから風に吹かしては見、吹かしては見るんだと、自分でも要領を得ない返事をして膳《ぜん》についた。見ると今夜も薩摩芋《さつまいも》の煮《に》つけだ。ここのうちは、いか銀よりも鄭寧《ていねい》で、親切で、しかも上品だが、惜《お》しい事に食い物がまずい。昨日も芋、一昨日《おととい》も芋で今夜も芋だ。おれは芋は大好きだと明言したには相違ないが、こう立てつづけに芋を食�
 �されては命がつづかない。うらなり君を笑うどころか、おれ自身が遠からぬうちに、芋のうらなり先生になっちまう。清ならこんな時に、おれの好きな鮪《まぐろ》のさし身か、蒲鉾《かまぼこ》のつけ焼を食わせるんだが、貧乏《びんぼう》士族のけちん坊《ぼう》と来ちゃ仕方がない。どう考えても清といっしょでなくっちあ駄目《だめ》だ。もしあの学校に長くでも居る模様なら、東京から召《よ》び寄《よ》せてやろう。天麩羅｜蕎麦《そば》を食っちゃならない、団子を食っちゃならない、それで下宿に居て芋ばかり食って黄色くなっていろなんて、教育者はつらいものだ。禅宗《ぜんしゅう》坊主だって、これよりは口に栄耀《えよう》をさせているだろう。――おれは一皿の芋を平げて、机の抽斗《ひきだし》から生卵を二
 つ出して、茶碗《ちゃわん》の縁《ふち》でたたき割って、ようやく凌《しの》いだ。生卵ででも営養をとらなくっちあ一週二十一時間の授業が出来るものか。　今日は清の手紙で湯に行く時間が遅くなった。しかし毎日行きつけたのを一日でも欠かすのは心持ちがわるい。汽車にでも乗って出懸《でか》けようと、例の赤手拭《あかてぬぐい》をぶら下げて停車場《ていしゃば》まで来ると二三分前に発車したばかりで、少々待たなければならぬ。ベンチへ腰を懸けて、敷島《しきしま》を吹かしていると、偶然《ぐうぜん》にもうらなり君がやって来た。おれはさっきの話を聞いてから、うらなり君がなおさら気の毒になった。平常《ふだん》から天地の間に居候《いそうろう》をしているように、小さく構えているのがいかにも憐《�
 ��わ》れに見えたが、今夜は憐れどころの騒《さわ》ぎではない。出来るならば月給を倍にして、遠山のお嬢さんと明日《あした》から結婚《けっこん》さして、一ヶ月ばかり東京へでも遊びにやってやりたい気がした矢先だから、やお湯ですか、さあ、こっちへお懸けなさいと威勢《いせい》よく席を譲《ゆず》ると、うらなり君は恐《おそ》れ入った体裁で、いえ構《かも》うておくれなさるな、と遠慮《えんりょ》だか何だかやっぱり立ってる。少し待たなくっちゃ出ません、草臥《くたび》れますからお懸けなさいとまた勧めてみた。実はどうかして、そばへ懸けてもらいたかったくらいに気の毒でたまらない。それではお邪魔《じゃま》を致《いた》しましょうとようやくおれの云う事を聞いてくれた。世の中には野だみたよう�
 �生意気な、出ないで済む所へ必ず顔を出す奴もいる。山嵐のようにおれが居なくっちゃ日本《にっぽん》が困るだろうと云うような面を肩《かた》の上へ載《の》せてる奴もいる。そうかと思うと、赤シャツのようにコスメチックと色男の問屋をもって自ら任じているのもある。教育が生きてフロックコートを着ればおれになるんだと云わぬばかりの狸《たぬき》もいる。皆々《みなみな》それ相応に威張ってるんだが、このうらなり先生のように在れどもなきがごとく、人質に取られた人形のように大人《おとな》しくしているのは見た事がない。顔はふくれているが、こんな結構な男を捨てて赤シャツに靡《なび》くなんて、マドンナもよっぼど気の知れないおきゃんだ。赤シャツが何ダース寄ったって、これほど立派な旦那様《だん
 なさま》が出来るもんか。「あなたはどっか悪いんじゃありませんか。大分たいぎそうに見えますが……」「いえ、別段これという持病もないですが……」「そりゃ結構です。からだが悪いと人間も駄目ですね」「あなたは大分ご丈夫《じょうぶ》のようですな」「ええ瘠《や》せても病気はしません。病気なんてものあ大嫌いですから」　うらなり君は、おれの言葉を聞いてにやにやと笑った。　ところへ入口で若々しい女の笑声が聞《きこ》えたから、何心なく振《ふ》り返ってみるとえらい奴が来た。色の白い、ハイカラ頭の、背の高い美人と、四十五六の奥さんとが並《なら》んで切符《きっぷ》を売る窓の前に立っている。おれは美人の形容などが出来る男でないから何にも云えないが全く美人に相違ない。何だか水晶《すいし�
 ��う》の珠《たま》を香水《こうすい》で暖《あっ》ためて、掌《てのひら》へ握《にぎ》ってみたような心持ちがした。年寄の方が背は低い。しかし顔はよく似ているから親子だろう。おれは、や、来たなと思う途端《とたん》に、うらなり君の事は全然《すっかり》忘れて、若い女の方ばかり見ていた。すると、うらなり君が突然《とつぜん》おれの隣《となり》から、立ち上がって、そろそろ女の方へ歩き出したんで、少し驚いた。マドンナじゃないかと思った。三人は切符所の前で軽く挨拶している。遠いから何を云ってるのか分らない。　停車場の時計を見るともう五分で発車だ。早く汽車がくればいいがなと、話し相手が居なくなったので待ち遠しく思っていると、また一人あわてて場内へ馳《か》け込《こ》んで来たものが�
 �る。見れば赤シャツだ。何だかべらべら然たる着物へ縮緬《ちりめん》の帯をだらしなく巻き付けて、例の通り金鎖《きんぐさ》りをぶらつかしている。あの金鎖りは贋物《にせもの》である。赤シャツは誰《だれ》も知るまいと思って、見せびらかしているが、おれはちゃんと知ってる。赤シャツは馳け込んだなり、何かきょろきょろしていたが、切符｜売下所《うりさげじょ》の前に話している三人へ慇懃《いんぎん》にお辞儀《じぎ》をして、何か二こと、三こと、云ったと思ったら、急にこっちへ向いて、例のごとく猫足《ねこあし》にあるいて来て、や君も湯ですか、僕は乗り後れやしないかと思って心配して急いで来たら、まだ三四分ある。あの時計はたしかかしらんと、自分の金側《きんがわ》を出して、二分ほどちがって
 ると云いながら、おれの傍《そば》へ腰を卸《おろ》した。女の方はちっとも見返らないで杖《つえ》の上に顋《あご》をのせて、正面ばかり眺《なが》めている。年寄の婦人は時々赤シャツを見るが、若い方は横を向いたままである。いよいよマドンナに違いない。　やがて、ピューと汽笛《きてき》が鳴って、車がつく。待ち合せた連中はぞろぞろ吾《わ》れ勝《がち》に乗り込む。赤シャツはいの一号に上等へ飛び込んだ。上等へ乗ったって威張れるどころではない、住田《すみた》まで上等が五銭で下等が三銭だから、わずか二銭違いで上下の区別がつく。こういうおれでさえ上等を奮発《ふんぱつ》して白切符を握《にぎ》ってるんでもわかる。もっとも田舎者はけちだから、たった二銭の出入でもすこぶる苦になると見えて、�
 ��抵《たいてい》は下等へ乗る。赤シャツのあとからマドンナとマドンナのお袋が上等へはいり込んだ。うらなり君は活版で押《お》したように下等ばかりへ乗る男だ。先生、下等の車室の入口へ立って、何だか躊躇《ちゅうちょ》の体《てい》であったが、おれの顔を見るや否や思いきって、飛び込んでしまった。おれはこの時何となく気の毒でたまらなかったから、うらなり君のあとから、すぐ同じ車室へ乗り込んだ。上等の切符で下等へ乗るに不都合はなかろう。　温泉へ着いて、三階から、浴衣《ゆかた》のなりで湯壺《ゆつぼ》へ下りてみたら、またうらなり君に逢った。おれは会議や何かでいざと極まると、咽喉《のど》が塞《ふさ》がって饒舌《しゃべ》れない男だが、平常《ふだん》は随分《ずいぶん》弁ずる方だから、�
 �ろいろ湯壺のなかでうらなり君に話しかけてみた。何だか憐れぽくってたまらない。こんな時に一口でも先方の心を慰《なぐさ》めてやるのは、江戸《えど》っ子の義務だと思ってる。ところがあいにくうらなり君の方では、うまい具合にこっちの調子に乗ってくれない。何を云っても、え［＃「え」に傍点］とかいえ［＃「いえ」に傍点］とかぎりで、しかもそのえ［＃「え」に傍点］といえ［＃「いえ」に傍点］が大分｜面倒《めんどう》らしいので、しまいにはとうとう切り上げて、こっちからご免蒙《めんこうむ》った。　湯の中では赤シャツに逢わなかった。もっとも風呂《ふろ》の数はたくさんあるのだから、同じ汽車で着いても、同じ湯壺で逢うとは極まっていない。別段不思議にも思わなかった。風呂を出てみるといい月
 だ。町内の両側に柳《やなぎ》が植《うわ》って、柳の枝《えだ》が丸《ま》るい影を往来の中へ落《おと》している。少し散歩でもしよう。北へ登って町のはずれへ出ると、左に大きな門があって、門の突き当りがお寺で、左右が妓楼《ぎろう》である。山門のなかに遊廓《ゆうかく》があるなんて、前代未聞の現象だ。ちょっとはいってみたいが、また狸から会議の時にやられるかも知れないから、やめて素通りにした。門の並びに黒い暖簾《のれん》をかけた、小さな格子窓《こうしまど》の平屋はおれが団子を食って、しくじった所だ。丸提灯《まるぢょうちん》に汁粉《しるこ》、お雑煮《ぞうに》とかいたのがぶらさがって、提灯の火が、軒端《のきば》に近い一本の柳の幹を照らしている。食いたいなと思ったが我慢して通�
 ��過ぎた。　食いたい団子の食えないのは情ない。しかし自分の許嫁《いいなずけ》が他人に心を移したのは、なお情ないだろう。うらなり君の事を思うと、団子は愚《おろ》か、三日ぐらい断食《だんじき》しても不平はこぼせない訳だ。本当に人間ほどあてにならないものはない。あの顔を見ると、どうしたって、そんな不人情な事をしそうには思えないんだが――うつくしい人が不人情で、冬瓜《とうがん》の水膨《みずぶく》れのような古賀さんが善良な君子なのだから、油断が出来ない。淡泊《たんぱく》だと思った山嵐は生徒を煽動《せんどう》したと云うし。生徒を煽動したのかと思うと、生徒の処分を校長に逼《せま》るし。厭味《いやみ》で練りかためたような赤シャツが存外親切で、おれに余所《よそ》ながら注意を�
 �てくれるかと思うと、マドンナを胡魔化《ごまか》したり、胡魔化したのかと思うと、古賀の方が破談にならなければ結婚は望まないんだと云うし。いか銀が難癖《なんくせ》をつけて、おれを追い出すかと思うと、すぐ野だ公が入《い》れ替《かわ》ったり――どう考えてもあてにならない。こんな事を清にかいてやったら定めて驚く事だろう。箱根《はこね》の向うだから化物《ばけもの》が寄り合ってるんだと云うかも知れない。　おれは、性来《しょうらい》構わない性分だから、どんな事でも苦にしないで今日まで凌いで来たのだが、ここへ来てからまだ一ヶ月立つか、立たないうちに、急に世のなかを物騒《ぶっそう》に思い出した。別段際だった大事件にも出逢わないのに、もう五つ六つ年を取ったような気がする。早く切
 り上げて東京へ帰るのが一番よかろう。などとそれからそれへ考えて、いつか石橋を渡《わた》って野芹川《のぜりがわ》の堤《どて》へ出た。川と云うとえらそうだが実は一間ぐらいな、ちょろちょろした流れで、土手に沿うて十二丁ほど下ると相生村《あいおいむら》へ出る。村には観音様《かんのんさま》がある。　温泉《ゆ》の町を振り返ると、赤い灯が、月の光の中にかがやいている。太鼓《たいこ》が鳴るのは遊廓に相違ない。川の流れは浅いけれども早いから、神経質の水のようにやたらに光る。ぶらぶら土手の上をあるきながら、約三丁も来たと思ったら、向うに人影《ひとかげ》が見え出した。月に透《す》かしてみると影は二つある。温泉《ゆ》へ来て村へ帰る若い衆《しゅ》かも知れない。それにしては唄《うた》�
 ��うたわない。存外静かだ。　だんだん歩いて行くと、おれの方が早足だと見えて、二つの影法師が、次第に大きくなる。一人は女らしい。おれの足音を聞きつけて、十間ぐらいの距離《きょり》に逼った時、男がたちまち振り向いた。月は後《うしろ》からさしている。その時おれは男の様子を見て、はてなと思った。男と女はまた元の通りにあるき出した。おれは考えがあるから、急に全速力で追っ懸《か》けた。先方は何の気もつかずに最初の通り、ゆるゆる歩を移している。今は話し声も手に取るように聞える。土手の幅は六尺ぐらいだから、並んで行けば三人がようやくだ。おれは苦もなく後ろから追い付いて、男の袖《そで》を擦《す》り抜《ぬ》けざま、二足前へ出した踵《くびす》をぐるりと返して男の顔を覗《のぞ》き�
 �《こ》んだ。月は正面からおれの五分｜刈《がり》の頭から顋の辺《あた》りまで、会釈《えしゃく》もなく照《てら》す。男はあっと小声に云ったが、急に横を向いて、もう帰ろうと女を促《うな》がすが早いか、温泉《ゆ》の町の方へ引き返した。　赤シャツは図太くて胡魔化すつもりか、気が弱くて名乗り損《そく》なったのかしら。ところが狭くて困ってるのは、おればかりではなかった。　　　　　八　赤シャツに勧められて釣《つり》に行った帰りから、山嵐《やまあらし》を疑ぐり出した。無い事を種に下宿を出ろと云われた時は、いよいよ不埒《ふらち》な奴《やつ》だと思った。ところが会議の席では案に相違《そうい》して滔々《とうとう》と生徒｜厳罰論《げんばつろん》を述べたから、おや変だなと首を捩《ひね
 》った。萩野《はぎの》の婆《ばあ》さんから、山嵐が、うらなり君のために赤シャツと談判をしたと聞いた時は、それは感心だと手を拍《う》った。この様子ではわる者は山嵐じゃあるまい、赤シャツの方が曲ってるんで、好加減《いいかげん》な邪推《じゃすい》を実《まこと》しやかに、しかも遠廻《とおまわ》しに、おれの頭の中へ浸《し》み込《こ》ましたのではあるまいかと迷ってる矢先へ、野芹川《のぜりがわ》の土手で、マドンナを連れて散歩なんかしている姿を見たから、それ以来赤シャツは曲者《くせもの》だと極《き》めてしまった。曲者だか何だかよくは分《わか》らないが、ともかくも善《い》い男じゃない。表と裏とは違《ちが》った男だ。人間は竹のように真直《まっすぐ》でなくっちゃ頼《たの》もしく�
 ��い。真直なものは喧嘩《けんか》をしても心持ちがいい。赤シャツのようなやさしいのと、親切なのと、高尚《こうしょう》なのと、琥珀《こはく》のパイプとを自慢《じまん》そうに見せびらかすのは油断が出来ない、めったに喧嘩も出来ないと思った。喧嘩をしても、回向院《えこういん》の相撲《すもう》のような心持ちのいい喧嘩は出来ないと思った。そうなると一銭五厘の出入《でいり》で控所《ひかえじょ》全体を驚《おど》ろかした議論の相手の山嵐の方がはるかに人間らしい。会議の時に金壺眼《かなつぼまなこ》をぐりつかせて、おれを睨《にら》めた時は憎《にく》い奴だと思ったが、あとで考えると、それも赤シャツのねちねちした猫撫声《ねこなでごえ》よりはましだ。実はあの会議が済んだあとで、よっぽど�
 �直りをしようかと思って、一こと二こと話しかけてみたが、野郎《やろう》返事もしないで、まだ眼《め》を剥《むく》ってみせたから、こっちも腹が立ってそのままにしておいた。　それ以来山嵐はおれと口を利かない。机の上へ返した一銭五厘はいまだに机の上に乗っている。ほこりだらけになって乗っている。おれは無論手が出せない、山嵐は決して持って帰らない。この一銭五厘が二人の間の墻壁《しょうへき》になって、おれは話そうと思っても話せない、山嵐は頑《がん》として黙《だま》ってる。おれと山嵐には一銭五厘が祟《たた》った。しまいには学校へ出て一銭五厘を見るのが苦になった。　山嵐とおれが絶交の姿となったに引き易《か》えて、赤シャツとおれは依然《いぜん》として在来の関係を保って、交際をつ
 づけている。野芹川で逢《あ》った翌日などは、学校へ出ると第一番におれの傍《そば》へ来て、君今度の下宿はいいですかのまたいっしょに露西亜《ロシア》文学を釣《つ》りに行こうじゃないかのといろいろな事を話しかけた。おれは少々｜憎《にく》らしかったから、昨夜《ゆうべ》は二返逢いましたねと云《い》ったら、ええ停車場《ていしゃば》で――君はいつでもあの時分｜出掛《でか》けるのですか、遅いじゃないかと云う。野芹川の土手でもお目に懸《かか》りましたねと喰《く》らわしてやったら、いいえ僕《ぼく》はあっちへは行かない、湯にはいって、すぐ帰ったと答えた。何もそんなに隠《かく》さないでもよかろう、現に逢ってるんだ。よく嘘《うそ》をつく男だ。これで中学の教頭が勤まるなら、おれなんか�
 ��学総長がつとまる。おれはこの時からいよいよ赤シャツを信用しなくなった。信用しない赤シャツとは口をきいて、感心している山嵐とは話をしない。世の中は随分妙《ずいぶんみょう》なものだ。　ある日の事赤シャツがちょっと君に話があるから、僕のうちまで来てくれと云うから、惜《お》しいと思ったが温泉行きを欠勤して四時｜頃《ごろ》出掛けて行った。赤シャツは一人ものだが、教頭だけに下宿はとくの昔《むかし》に引き払《はら》って立派な玄関《げんかん》を構えている。家賃は九円五｜拾銭《じっせん》だそうだ。田舎《いなか》へ来て九円五拾銭払えばこんな家へはいれるなら、おれも一つ奮発《ふんぱつ》して、東京から清を呼び寄せて喜ばしてやろうと思ったくらいな玄関だ。頼むと云ったら、赤シャツの�
 �が取次《とりつぎ》に出て来た。この弟は学校で、おれに代数と算術を教わる至って出来のわるい子だ。その癖渡《くせわた》りものだから、生れ付いての田舎者よりも人が悪《わ》るい。　赤シャツに逢って用事を聞いてみると、大将例の琥珀のパイプで、きな臭《くさ》い烟草《たばこ》をふかしながら、こんな事を云った。「君が来てくれてから、前任者の時代よりも成績《せいせき》がよくあがって、校長も大いにいい人を得たと喜んでいるので――どうか学校でも信頼《しんらい》しているのだから、そのつもりで勉強していただきたい」「へえ、そうですか、勉強って今より勉強は出来ませんが――」「今のくらいで充分《じゅうぶん》です。ただ先だってお話しした事ですね、あれを忘れずにいて下さればいいのです」「下
 宿の世話なんかするものあ剣呑《けんのん》だという事ですか」「そう露骨《ろこつ》に云うと、意味もない事になるが――まあ善いさ――精神は君にもよく通じている事と思うから。そこで君が今のように出精《しゅっせい》して下されば、学校の方でも、ちゃんと見ているんだから、もう少しして都合《つごう》さえつけば、待遇《たいぐう》の事も多少はどうにかなるだろうと思うんですがね」「へえ、俸給《ほうきゅう》ですか。俸給なんかどうでもいいんですが、上がれば上がった方がいいですね」「それで幸い今度転任者が一人出来るから――もっとも校長に相談してみないと無論受け合えない事だが――その俸給から少しは融通《ゆうずう》が出来るかも知れないから、それで都合をつけるように校長に話してみようと思う�
 ��ですがね」「どうも難有《ありがと》う。だれが転任するんですか」「もう発表になるから話しても差し支《つか》えないでしょう。実は古賀君です」「古賀さんは、だってここの人じゃありませんか」「ここの地《じ》の人ですが、少し都合があって――半分は当人の希望です」「どこへ行《ゆ》くんです」「日向《ひゅうが》の延岡《のべおか》で――土地が土地だから一級俸｜上《あが》って行く事になりました」「誰《だれ》か代りが来るんですか」「代りも大抵《たいてい》極まってるんです。その代りの具合で君の待遇上の都合もつくんです」「はあ、結構です。しかし無理に上がらないでも構いません」「とも角も僕は校長に話すつもりです。それで校長も同意見らしいが、追っては君にもっと働いて頂《いた》だかなく�
 �てはならんようになるかも知れないから、どうか今からそのつもりで覚悟《かくご》をしてやってもらいたいですね」「今より時間でも増すんですか」「いいえ、時間は今より減るかも知れませんが――」「時間が減って、もっと働くんですか、妙だな」「ちょっと聞くと妙だが、――判然とは今言いにくいが――まあつまり、君にもっと重大な責任を持ってもらうかも知れないという意味なんです」　おれには一向分らない。今より重大な責任と云えば、数学の主任だろうが、主任は山嵐だから、やっこさんなかなか辞職する気遣《きづか》いはない。それに、生徒の人望があるから転任や免職《めんしょく》は学校の得策であるまい。赤シャツの談話はいつでも要領を得ない。要領を得なくっても用事はこれで済んだ。それから少し雑
 談をしているうちに、うらなり君の送別会をやる事や、ついてはおれが酒を飲むかと云う問や、うらなり先生は君子で愛すべき人だと云う事や――赤シャツはいろいろ弁じた。しまいに話をかえて君俳句をやりますかと来たから、こいつは大変だと思って、俳句はやりません、さようならと、そこそこに帰って来た。発句《ほっく》は芭蕉《ばしょう》か髪結床《かみいどこ》の親方のやるもんだ。数学の先生が朝顔やに釣瓶《つるべ》をとられてたまるものか。　帰ってうんと考え込んだ。世間には随分気の知れない男が居る。家屋敷はもちろん、勤める学校に不足のない故郷がいやになったからと云って、知らぬ他国へ苦労を求めに出る。それも花の都の電車が通《かよ》ってる所なら、まだしもだが、日向の延岡とは何の事だ。おれ�
 ��船つきのいいここへ来てさえ、一ヶ月立たないうちにもう帰りたくなった。延岡と云えば山の中も山の中も大変な山の中だ。赤シャツの云うところによると船から上がって、一日《いちんち》馬車へ乗って、宮崎へ行って、宮崎からまた一日《いちんち》車へ乗らなくっては着けないそうだ。名前を聞いてさえ、開けた所とは思えない。猿《さる》と人とが半々に住んでるような気がする。いかに聖人のうらなり君だって、好んで猿の相手になりたくもないだろうに、何という物数奇《ものずき》だ。　ところへあいかわらず婆《ばあ》さんが夕食《ゆうめし》を運んで出る。今日もまた芋《いも》ですかいと聞いてみたら、いえ今日はお豆腐《とうふ》ぞなもしと云った。どっちにしたって似たものだ。「お婆さん古賀さんは日向へ行�
 �そうですね」「ほん当にお気の毒じゃな、もし」「お気の毒だって、好んで行くんなら仕方がないですね」「好んで行くて、誰がぞなもし」「誰がぞなもしって、当人がさ。古賀先生が物数奇に行くんじゃありませんか」「そりゃあなた、大違いの勘五郎《かんごろう》ぞなもし」「勘五郎かね。だって今赤シャツがそう云いましたぜ。それが勘五郎なら赤シャツは嘘つきの法螺右衛門《ほらえもん》だ」「教頭さんが、そうお云いるのはもっともじゃが、古賀さんのお往《い》きともないのももっともぞなもし」「そんなら両方もっともなんですね。お婆さんは公平でいい。一体どういう訳なんですい」「今朝古賀のお母さんが見えて、だんだん訳をお話したがなもし」「どんな訳をお話したんです」「あそこもお父さんがお亡くなりて
 から、あたし達が思うほど暮《くら》し向《むき》が豊かになうてお困りじゃけれ、お母さんが校長さんにお頼みて、もう四年も勤めているものじゃけれ、どうぞ毎月頂くものを、今少しふやしておくれんかてて、あなた」「なるほど」「校長さんが、ようまあ考えてみとこうとお云いたげな。それでお母さんも安心して、今に増給のご沙汰《さた》があろぞ、今月か来月かと首を長くして待っておいでたところへ、校長さんがちょっと来てくれと古賀さんにお云いるけれ、行ってみると、気の毒だが学校は金が足りんけれ、月給を上げる訳にゆかん。しかし延岡になら空いた口があって、そっちなら毎月五円余分にとれるから、お望み通りでよかろうと思うて、その手続きにしたから行くがええと云われたげな。――」「じゃ相談じゃな�
 ��、命令じゃありませんか」「さよよ。古賀さんはよそへ行って月給が増すより、元のままでもええから、ここに居《お》りたい。屋敷もあるし、母もあるからとお頼みたけれども、もうそう極めたあとで、古賀さんの代りは出来ているけれ仕方がないと校長がお云いたげな」「へん人を馬鹿《ばか》にしてら、面白《おもしろ》くもない。じゃ古賀さんは行く気はないんですね。どうれで変だと思った。五円ぐらい上がったって、あんな山の中へ猿のお相手をしに行く唐変木《とうへんぼく》はまずないからね」「唐変木て、先生なんぞなもし」「何でもいいでさあ、――全く赤シャツの作略《さりゃく》だね。よくない仕打《しうち》だ。まるで欺撃《だましうち》ですね。それでおれの月給を上げるなんて、不都合《ふつごう》な事�
 �あるものか。上げてやるったって、誰が上がってやるものか」「先生は月給がお上りるのかなもし」「上げてやるって云うから、断《こと》わろうと思うんです」「何で、お断わりるのぞなもし」「何でもお断わりだ。お婆さん、あの赤シャツは馬鹿ですぜ。卑怯《ひきょう》でさあ」「卑怯でもあんた、月給を上げておくれたら、大人《おとな》しく頂いておく方が得ぞなもし。若いうちはよく腹の立つものじゃが、年をとってから考えると、も少しの我慢《がまん》じゃあったのに惜しい事をした。腹立てたためにこないな損をしたと悔《くや》むのが当り前じゃけれ、お婆の言う事をきいて、赤シャツさんが月給をあげてやろとお言いたら、難有《ありがと》うと受けておおきなさいや」「年寄《としより》の癖に余計な世話を焼か
 なくってもいい。おれの月給は上がろうと下がろうとおれの月給だ」　婆さんはだまって引き込んだ。爺《じい》さんは呑気《のんき》な声を出して謡《うたい》をうたってる。謡というものは読んでわかる所を、やにむずかしい節をつけて、わざと分らなくする術だろう。あんな者を毎晩｜飽《あ》きずに唸《うな》る爺さんの気が知れない。おれは謡どころの騒《さわ》ぎじゃない。月給を上げてやろうと云うから、別段欲しくもなかったが、入らない金を余しておくのももったいないと思って、よろしいと承知したのだが、転任したくないものを無理に転任させてその男の月給の上前を跳《は》ねるなんて不人情な事が出来るものか。当人がもとの通りでいいと云うのに延岡｜下《くんだ》りまで落ちさせるとは一体どう云う了見《�
 ��ょうけん》だろう。太宰権帥《だざいごんのそつ》でさえ博多《はかた》近辺で落ちついたものだ。河合又五郎《かあいまたごろう》だって相良《さがら》でとまってるじゃないか。とにかく赤シャツの所へ行って断わって来なくっちあ気が済まない。　小倉《こくら》の袴《はかま》をつけてまた出掛けた。大きな玄関へ突《つ》っ立って頼むと云うと、また例の弟が取次に出て来た。おれの顔を見てまた来たかという眼付《めつき》をした。用があれば二度だって三度だって来る。よる夜なかだって叩《たた》き起《おこ》さないとは限らない。教頭の所へご機嫌伺《きげんうかが》いにくるようなおれと見損《みそくな》ってるか。これでも月給が入らないから返しに来《きた》んだ。すると弟が今来客中だと云うから、玄関でい�
 �からちょっとお目にかかりたいと云ったら奥《おく》へ引き込んだ。足元を見ると、畳付《たたみつ》きの薄っぺらな、のめりの駒下駄《こまげた》がある。奥でもう万歳《ばんざい》ですよと云う声が聞《きこ》える。お客とは野だだなと気がついた。野だでなくては、あんな黄色い声を出して、こんな芸人じみた下駄を穿《は》くものはない。　しばらくすると、赤シャツがランプを持って玄関まで出て来て、まあ上がりたまえ、外の人じゃない吉川君だ、と云うから、いえここでたくさんです。ちょっと話せばいいんです、と云って、赤シャツの顔を見ると金時のようだ。野だ公と一杯《いっぱい》飲んでると見える。「さっき僕の月給を上げてやるというお話でしたが、少し考えが変ったから断わりに来たんです」　赤シャツはラ
 ンプを前へ出して、奥の方からおれの顔を眺《なが》めたが、とっさの場合返事をしかねて茫然《ぼうぜん》としている。増給を断わる奴が世の中にたった一人飛び出して来たのを不審《ふしん》に思ったのか、断わるにしても、今帰ったばかりで、すぐ出直してこなくってもよさそうなものだと、呆《あき》れ返ったのか、または双方合併《そうほうがっぺい》したのか、妙な口をして突っ立ったままである。「あの時承知したのは、古賀君が自分の希望で転任するという話でしたからで……」「古賀君は全く自分の希望で半ば転任するんです」「そうじゃないんです、ここに居たいんです。元の月給でもいいから、郷里に居たいのです」「君は古賀君から、そう聞いたのですか」「そりゃ当人から、聞いたんじゃありません」「じゃ誰�
 ��らお聞きです」「僕の下宿の婆さんが、古賀さんのおっ母《か》さんから聞いたのを今日僕に話したのです」「じゃ、下宿の婆さんがそう云ったのですね」「まあそうです」「それは失礼ながら少し違うでしょう。あなたのおっしゃる通りだと、下宿屋の婆さんの云う事は信ずるが、教頭の云う事は信じないと云うように聞えるが、そういう意味に解釈して差支《さしつか》えないでしょうか」　おれはちょっと困った。文学士なんてものはやっぱりえらいものだ。妙な所へこだわって、ねちねち押《お》し寄せてくる。おれはよく親父《おやじ》から貴様はそそっかしくて駄目《だめ》だ駄目だと云われたが、なるほど少々そそっかしいようだ。婆さんの話を聞いてはっと思って飛び出して来たが、実はうらなり君にもうらなりのおっ�
 �さんにも逢って詳《くわ》しい事情は聞いてみなかったのだ。だからこう文学士流に斬《き》り付けられると、ちょっと受け留めにくい。　正面からは受け留めにくいが、おれはもう赤シャツに対して不信任を心の中《うち》で申し渡してしまった。下宿の婆さんもけちん坊《ぼう》の欲張り屋に相違ないが、嘘は吐《つ》かない女だ、赤シャツのように裏表はない。おれは仕方がないから、こう答えた。「あなたの云う事は本当かも知れないですが――とにかく増給はご免蒙《めんこうむ》ります」「それはますます可笑《おか》しい。今君がわざわざお出《いで》になったのは増俸を受けるには忍《しの》びない、理由を見出したからのように聞えたが、その理由が僕の説明で取り去られたにもかかわらず増俸を否まれるのは少し解し
 かねるようですね」「解しかねるかも知れませんがね。とにかく断わりますよ」「そんなに否《いや》なら強いてとまでは云いませんが、そう二三時間のうちに、特別の理由もないのに豹変《ひょうへん》しちゃ、将来君の信用にかかわる」「かかわっても構わないです」「そんな事はないはずです、人間に信用ほど大切なものはありませんよ。よしんば今一歩｜譲《ゆず》って、下宿の主人が……」「主人じゃない、婆さんです」「どちらでもよろしい。下宿の婆さんが君に話した事を事実としたところで、君の増給は古賀君の所得を削《けず》って得たものではないでしょう。古賀君は延岡へ行かれる。その代りがくる。その代りが古賀君よりも多少低給で来てくれる。その剰余《じょうよ》を君に廻《ま》わすと云うのだから、君は�
 ��にも気の毒がる必要はないはずです。古賀君は延岡でただ今よりも栄進される。新任者は最初からの約束《やくそく》で安くくる。それで君が上がられれば、これほど都合《つごう》のいい事はないと思うですがね。いやなら否《いや》でもいいが、もう一返うちでよく考えてみませんか」　おれの頭はあまりえらくないのだから、いつもなら、相手がこういう巧妙《こうみょう》な弁舌を揮《ふる》えば、おやそうかな、それじゃ、おれが間違ってたと恐《おそ》れ入って引きさがるのだけれども、今夜はそうは行かない。ここへ来た最初から赤シャツは何だか虫が好かなかった。途中《とちゅう》で親切な女みたような男だと思い返した事はあるが、それが親切でも何でもなさそうなので、反動の結果今じゃよっぽど厭《いや》にな�
 �ている。だから先がどれほどうまく論理的に弁論を逞《たくまし》くしようとも、堂々たる教頭流におれを遣り込めようとも、そんな事は構わない。議論のいい人が善人とはきまらない。遣り込められる方が悪人とは限らない。表向きは赤シャツの方が重々もっともだが、表向きがいくら立派だって、腹の中まで惚《ほ》れさせる訳には行かない。金や威力《いりょく》や理屈《りくつ》で人間の心が買える者なら、高利貸でも巡査《じゅんさ》でも大学教授でも一番人に好かれなくてはならない。中学の教頭ぐらいな論法でおれの心がどう動くものか。人間は好き嫌いで働くものだ。論法で働くものじゃない。「あなたの云う事はもっともですが、僕は増給がいやになったんですから、まあ断わります。考えたって同じ事です。さような
 ら」と云いすてて門を出た。頭の上には天の川が一筋かかっている。　　　　　九　うらなり君の送別会のあるという日の朝、学校へ出たら、山嵐《やまあらし》が突然《とつぜん》、君先だってはいか銀が来て、君が乱暴して困るから、どうか出るように話してくれと頼《たの》んだから、真面目《まじめ》に受けて、君に出てやれと話したのだが、あとから聞いてみると、あいつは悪《わ》るい奴《やつ》で、よく偽筆《ぎひつ》へ贋落款《にせらっかん》などを押《お》して売りつけるそうだから、全く君の事も出鱈目《でたらめ》に違《ちが》いない。君に懸物《かけもの》や骨董《こっとう》を売りつけて、商売にしようと思ってたところが、君が取り合わないで儲《もう》けがないものだから、あんな作りごとをこしらえて胡�
 ��化《ごまか》したのだ。僕はあの人物を知らなかったので君に大変失敬した勘弁《かんべん》したまえと長々しい謝罪をした。　おれは何とも云わずに、山嵐の机の上にあった、一銭五｜厘《りん》をとって、おれの蝦蟇口《がまぐち》のなかへ入れた。山嵐は君それを引き込《こ》めるのかと不審《ふしん》そうに聞くから、うんおれは君に奢《おご》られるのが、いやだったから、是非返すつもりでいたが、その後だんだん考えてみると、やっぱり奢ってもらう方がいいようだから、引き込ますんだと説明した。山嵐は大きな声をしてアハハハと笑いながら、そんなら、なぜ早く取らなかったのだと聞いた。実は取ろう取ろうと思ってたが、何だか妙《みょう》だからそのままにしておいた。近来は学校へ来て一銭五厘を見るのが苦�
 �なるくらいいやだったと云ったら、君はよっぽど負け惜《お》しみの強い男だと云うから、君はよっぽど剛情張《ごうじょうっぱ》りだと答えてやった。それから二人の間にこんな問答が起《おこ》った。「君は一体どこの産だ」「おれは江戸《えど》っ子だ」「うん、江戸っ子か、道理で負け惜しみが強いと思った」「きみはどこだ」「僕は会津《あいづ》だ」「会津っぽか、強情な訳だ。今日の送別会へ行くのかい」「行くとも、君は？」「おれは無論行くんだ。古賀さんが立つ時は、浜《はま》まで見送りに行こうと思ってるくらいだ」「送別会は面白いぜ、出て見たまえ。今日は大いに飲むつもりだ」「勝手に飲むがいい。おれは肴《さかな》を食ったら、すぐ帰る。酒なんか飲む奴は馬鹿《ばか》だ」「君はすぐ喧嘩《けんか》
 を吹《ふ》き懸《か》ける男だ。なるほど江戸っ子の軽跳《けいちょう》な風を、よく、あらわしてる」「何でもいい、送別会へ行く前にちょっとおれのうちへお寄り、話《はな》しがあるから」　山嵐は約束《やくそく》通りおれの下宿へ寄った。おれはこの間から、うらなり君の顔を見る度に気の毒でたまらなかったが、いよいよ送別の今日となったら、何だか憐《あわ》れっぽくって、出来る事なら、おれが代りに行ってやりたい様な気がしだした。それで送別会の席上で、大いに演説でもしてその行を盛《さかん》にしてやりたいと思うのだが、おれのべらんめえ調子じゃ、到底《とうてい》物にならないから、大きな声を出す山嵐を雇《やと》って、一番赤シャツの荒肝《あらぎも》を挫《ひし》いでやろうと考え付いたから、�
 ��ざわざ山嵐を呼んだのである。　おれはまず冒頭《ぼうとう》としてマドンナ事件から説き出したが、山嵐は無論マドンナ事件はおれより詳《くわ》しく知っている。おれが野芹川《のぜりがわ》の土手の話をして、あれは馬鹿野郎《ばかやろう》だと云ったら、山嵐は君はだれを捕《つら》まえても馬鹿｜呼《よば》わりをする。今日学校で自分の事を馬鹿と云ったじゃないか。自分が馬鹿なら、赤シャツは馬鹿じゃない。自分は赤シャツの同類じゃないと主張した。それじゃ赤シャツは腑抜《ふぬ》けの呆助《ほうすけ》だと云ったら、そうかもしれないと山嵐は大いに賛成した。山嵐は強い事は強いが、こんな言葉になると、おれより遥《はる》かに字を知っていない。会津っぽなんてものはみんな、こんな、ものなんだろう。　�
 �れから増給事件と将来重く登用すると赤シャツが云った話をしたら山嵐はふふんと鼻から声を出して、それじゃ僕を免職《めんしょく》する考えだなと云った。免職するつもりだって、君は免職になる気かと聞いたら、誰《だれ》がなるものか、自分が免職になるなら、赤シャツもいっしょに免職させてやると大いに威張《いば》った。どうしていっしょに免職させる気かと押し返して尋《たず》ねたら、そこはまだ考えていないと答えた。山嵐は強そうだが、智慧《ちえ》はあまりなさそうだ。おれが増給を断《こと》わったと話したら、大将大きに喜んでさすが江戸っ子だ、えらいと賞《ほ》めてくれた。　うらなりが、そんなに厭《いや》がっているなら、なぜ留任の運動をしてやらなかったと聞いてみたら、うらなりから話を聞い
 た時は、既《すで》にきまってしまって、校長へ二度、赤シャツへ一度行って談判してみたが、どうする事も出来なかったと話した。それについても古賀があまり好人物過ぎるから困る。赤シャツから話があった時、断然断わるか、一応考えてみますと逃《に》げればいいのに、あの弁舌に胡魔化されて、即席《そくせき》に許諾《きょだく》したものだから、あとからお母《っか》さんが泣きついても、自分が談判に行っても役に立たなかったと非常に残念がった。　今度の事件は全く赤シャツが、うらなりを遠ざけて、マドンナを手に入れる策略なんだろうとおれが云ったら、無論そうに違いない。あいつは大人《おとな》しい顔をして、悪事を働いて、人が何か云うと、ちゃんと逃道《にげみち》を拵《こしら》えて待ってるんだか�
 ��、よっぽど奸物《かんぶつ》だ。あんな奴にかかっては鉄拳制裁《てっけんせいさい》でなくっちゃ利かないと、瘤《こぶ》だらけの腕《うで》をまくってみせた。おれはついでだから、君の腕は強そうだな柔術《じゅうじゅつ》でもやるかと聞いてみた。すると大将二の腕へ力瘤を入れて、ちょっと攫《つか》んでみろと云うから、指の先で揉《も》んでみたら、何の事はない湯屋にある軽石の様なものだ。　おれはあまり感心したから、君そのくらいの腕なら、赤シャツの五人や六人は一度に張り飛ばされるだろうと聞いたら、無論さと云いながら、曲げた腕を伸《の》ばしたり、縮ましたりすると、力瘤がぐるりぐるりと皮のなかで廻転《かいてん》する。すこぶる愉快《ゆかい》だ。山嵐の証明する所によると、かんじん綯《よ�
 �りを二本より合せて、この力瘤の出る所へ巻きつけて、うんと腕を曲げると、ぷつりと切れるそうだ。かんじんよりなら、おれにも出来そうだと云ったら、出来るものか、出来るならやってみろと来た。切れないと外聞がわるいから、おれは見合せた。　君どうだ、今夜の送別会に大いに飲んだあと、赤シャツと野だを撲《なぐ》ってやらないかと面白半分に勧めてみたら、山嵐はそうだなと考えていたが、今夜はまあよそうと云った。なぜと聞くと、今夜は古賀に気の毒だから――それにどうせ撲るくらいなら、あいつらの悪るい所を見届けて現場で撲らなくっちゃ、こっちの落度になるからと、分別のありそうな事を附加《つけた》した。山嵐でもおれよりは考えがあると見える。　じゃ演説をして古賀君を大いにほめてやれ、おれが
 すると江戸っ子のぺらぺらになって重みがなくていけない。そうして、きまった所へ出ると、急に溜飲《りゅういん》が起って咽喉《のど》の所へ、大きな丸《たま》が上がって来て言葉が出ないから、君に譲《ゆず》るからと云ったら、妙な病気だな、じゃ君は人中じゃ口は利けないんだね、困るだろう、と聞くから、何そんなに困りゃしないと答えておいた。　そうこうするうち時間が来たから、山嵐と一所に会場へ行く。会場は花晨亭《かしんてい》といって、当地《ここ》で第一等の料理屋だそうだが、おれは一度も足を入れた事がない。もとの家老とかの屋敷《やしき》を買い入れて、そのまま開業したという話だが、なるほど見懸《みかけ》からして厳《いか》めしい構えだ。家老の屋敷が料理屋になるのは、陣羽織《じんば�
 ��り》を縫《ぬ》い直して、胴着《どうぎ》にする様なものだ。　二人が着いた頃《ころ》には、人数《にんず》ももう大概揃《たいがいそろ》って、五十｜畳《じょう》の広間に二つ三つ人間の塊《かたまり》が出来ている。五十畳だけに床《とこ》は素敵に大きい。おれが山城屋で占領《せんりょう》した十五畳敷の床とは比較にならない。尺を取ってみたら二間あった。右の方に、赤い模様のある瀬戸物の瓶《かめ》を据《す》えて、その中に松《まつ》の大きな枝《えだ》が挿《さ》してある。松の枝を挿して何にする気か知らないが、何ヶ月立っても散る気遣いがないから、銭が懸らなくって、よかろう。あの瀬戸物はどこで出来るんだと博物の教師に聞いたら、あれは瀬戸物じゃありません、伊万里《いまり》ですと云った。�
 �万里だって瀬戸物じゃないかと、云ったら、博物はえへへへへと笑っていた。あとで聞いてみたら、瀬戸で出来る焼物だから、瀬戸と云うのだそうだ。おれは江戸っ子だから、陶器《とうき》の事を瀬戸物というのかと思っていた。床の真中に大きな懸物があって、おれの顔くらいな大きさな字が二十八字かいてある。どうも下手《へた》なものだ。あんまり不味《まず》いから、漢学の先生に、なぜあんなまずいものを麗々《れいれい》と懸けておくんですと尋《たず》ねたところ、先生はあれは海屋《かいおく》といって有名な書家のかいた者だと教えてくれた。海屋だか何だか、おれは今だに下手だと思っている。　やがて書記の川村がどうかお着席をと云うから、柱があって靠《よ》りかかるのに都合のいい所へ坐《すわ》った。
 海屋の懸物の前に狸《たぬき》が羽織《はおり》、袴《はかま》で着席すると、左に赤シャツが同じく羽織袴で陣取《じんど》った。右の方は主人公だというのでうらなり先生、これも日本服で控《ひか》えている。おれは洋服だから、かしこまるのが窮屈《きゅうくつ》だったから、すぐ胡坐《あぐら》をかいた。隣《とな》りの体操《たいそう》教師は黒ずぼん［＃「ずぼん」に傍点］で、ちゃんとかしこまっている。体操の教師だけにいやに修行が積んでいる。やがてお膳《ぜん》が出る。徳利《とくり》が並《なら》ぶ。幹事が立って、一言《いちごん》開会の辞を述べる。それから狸が立つ。赤シャツが起《た》つ。ことごとく送別の辞を述べたが、三人共申し合せたようにうらなり君の、良教師で好人物な事を吹聴《ふいちょ�
 ��》して、今回去られるのはまことに残念である、学校としてのみならず、個人として大いに惜しむところであるが、ご一身上のご都合で、切に転任をご希望になったのだから致《いた》し方《かた》がないという意味を述べた。こんな嘘《うそ》をついて送別会を開いて、それでちっとも恥《はず》かしいとも思っていない。ことに赤シャツに至って三人のうちで一番うらなり君をほめた。この良友を失うのは実に自分にとって大なる不幸であるとまで云った。しかもそのいい方がいかにも、もっともらしくって、例のやさしい声を一層やさしくして、述べ立てるのだから、始めて聞いたものは、誰でもきっとだまされるに極《きま》ってる。マドンナも大方この手で引掛《ひっか》けたんだろう。赤シャツが送別の辞を述べ立てている�
 �中、向側《むかいがわ》に坐っていた山嵐がおれの顔を見てちょっと稲光《いなびかり》をさした。おれは返電として、人指し指でべっかんこうをして見せた。　赤シャツが座に復するのを待ちかねて、山嵐がぬっと立ち上がったから、おれは嬉《うれ》しかったので、思わず手をぱちぱちと拍《う》った。すると狸を始め一同がことごとくおれの方を見たには少々困った。山嵐は何を云うかと思うとただ今校長始めことに教頭は古賀君の転任を非常に残念がられたが、私は少々反対で古賀君が一日《いちじつ》も早く当地を去られるのを希望しております。延岡は僻遠《へきえん》の地で、当地に比べたら物質上の不便はあるだろう。が、聞くところによれば風俗のすこぶる淳朴《じゅんぼく》な所で、職員生徒ことごとく上代樸直《じ
 ょうだいぼくちょく》の気風を帯びているそうである。心にもないお世辞を振《ふ》り蒔《ま》いたり、美しい顔をして君子を陥《おとしい》れたりするハイカラ野郎は一人もないと信ずるからして、君のごとき温良｜篤厚《とっこう》の士は必ずその地方一般の歓迎《かんげい》を受けられるに相違《そうい》ない。吾輩《わがはい》は大いに古賀君のためにこの転任を祝するのである。終りに臨んで君が延岡に赴任《ふにん》されたら、その地の淑女《しゅくじょ》にして、君子の好逑《こうきゅう》となるべき資格あるものを択《えら》んで一日《いちじつ》も早く円満なる家庭をかたち作って、かの不貞無節なるお転婆《てんば》を事実の上において慚死《ざんし》せしめん事を希望します。えへんえへんと二つばかり大きな咳払�
 ��せきばら》いをして席に着いた。おれは今度も手を叩《たた》こうと思ったが、またみんながおれの面《かお》を見るといやだから、やめにしておいた。山嵐が坐ると今度はうらなり先生が起った。先生はご鄭寧《ていねい》に、自席から、座敷の端《はし》の末座まで行って、慇懃《いんぎん》に一同に挨拶《あいさつ》をした上、今般は一身上の都合で九州へ参る事になりましたについて、諸先生方が小生のためにこの盛大《せいだい》なる送別会をお開き下さったのは、まことに感銘《かんめい》の至りに堪《た》えぬ次第で――ことにただ今は校長、教頭その他諸君の送別の辞を頂戴《ちょうだい》して、大いに難有《ありがた》く服膺《ふくよう》する訳であります。私はこれから遠方へ参りますが、なにとぞ従前の通りお見�
 �てなくご愛顧《あいこ》のほどを願います。とへえつく張って席に戻《もど》った。うらなり君はどこまで人が好いんだか、ほとんど底が知れない。自分がこんなに馬鹿にされている校長や、教頭に恭《うやうや》しくお礼を云っている。それも義理｜一遍《いっぺん》の挨拶ならだが、あの様子や、あの言葉つきや、あの顔つきから云うと、心《しん》から感謝しているらしい。こんな聖人に真面目にお礼を云われたら、気の毒になって、赤面しそうなものだが狸も赤シャツも真面目に謹聴《きんちょう》しているばかりだ。　挨拶が済んだら、あちらでもチュー、こちらでもチュー、という音がする。おれも真似をして汁《しる》を飲んでみたがまずいもんだ。口取《くちとり》に蒲鉾《かまぼこ》はついてるが、どす黒くて竹輪の出
 来損《できそこ》ないである。刺身《さしみ》も並んでるが、厚くって鮪《まぐろ》の切り身を生で食うと同じ事だ。それでも隣《とな》り近所の連中はむしゃむしゃ旨《うま》そうに食っている。大方江戸前の料理を食った事がないんだろう。　そのうち燗徳利《かんどくり》が頻繁《ひんぱん》に往来し始めたら、四方が急に賑《にぎ》やかになった。野だ公は恭しく校長の前へ出て盃《さかずき》を頂いてる。いやな奴だ。うらなり君は順々に献酬《けんしゅう》をして、一巡周《いちじゅんめぐ》るつもりとみえる。はなはだご苦労である。うらなり君がおれの前へ来て、一つ頂戴致しましょうと袴のひだを正して申し込まれたから、おれも窮屈にズボンのままかしこまって、一｜盃《ぱい》差し上げた。せっかく参って、すぐお�
 ��れになるのは残念ですね。ご出立《しゅったつ》はいつです、是非浜までお見送りをしましょうと云ったら、うらなり君はいえご用｜多《おお》のところ決してそれには及《およ》びませんと答えた。うらなり君が何と云ったって、おれは学校を休んで送る気でいる。　それから一時間ほどするうちに席上は大分乱れて来る。まあ一｜杯《ぱい》、おや僕が飲めと云うのに……などと呂律《ろれつ》の巡《まわ》りかねるのも一人二人《ひとりふたり》出来て来た。少々｜退屈《たいくつ》したから便所へ行って、昔風な庭を星明りにすかして眺《なが》めていると山嵐が来た。どうださっきの演説はうまかったろう。と大分得意である。大賛成だが一ヶ所気に入らないと抗議《こうぎ》を申し込んだら、どこが不賛成だと聞いた。「美�
 �い顔をして人を陥れるようなハイカラ野郎は延岡に居《お》らないから……と君は云ったろう」「うん」「ハイカラ野郎だけでは不足だよ」「じゃ何と云うんだ」「ハイカラ野郎の、ペテン師の、イカサマ師の、猫被《ねこっかぶ》りの、香具師《やし》の、モモンガーの、岡っ引きの、わんわん鳴けば犬も同然な奴とでも云うがいい」「おれには、そう舌は廻らない。君は能弁だ。第一単語を大変たくさん知ってる。それで演舌《えんぜつ》が出来ないのは不思議だ」「なにこれは喧嘩《けんか》のときに使おうと思って、用心のために取っておく言葉さ。演舌となっちゃ、こうは出ない」「そうかな、しかしぺらぺら出るぜ。もう一遍やって見たまえ」「何遍でもやるさいいか。――ハイカラ野郎のペテン師の、イカサマ師の……」と
 云いかけていると、椽側《えんがわ》をどたばた云わして、二人ばかり、よろよろしながら馳《か》け出して来た。「両君そりゃひどい、――逃げるなんて、――僕が居るうちは決して逃《にが》さない、さあのみたまえ。――いかさま師？――面白い、いかさま面白い。――さあ飲みたまえ」とおれと山嵐をぐいぐい引っ張って行く。実はこの両人共便所に来たのだが、酔《よ》ってるもんだから、便所へはいるのを忘れて、おれ等を引っ張るのだろう。酔っ払いは目の中《あた》る所へ用事を拵えて、前の事はすぐ忘れてしまうんだろう。「さあ、諸君、いかさま師を引っ張って来た。さあ飲ましてくれたまえ。いかさま師をうんと云うほど、酔わしてくれたまえ。君逃げちゃいかん」と逃げもせぬ、おれを壁際《かべぎわ》へ圧《お�
 ��し付けた。諸方を見廻してみると、膳の上に満足な肴の乗っているのは一つもない。自分の分を奇麗《きれい》に食い尽《つく》して、五六間先へ遠征《えんせい》に出た奴もいる。校長はいつ帰ったか姿が見えない。　ところへお座敷はこちら？　と芸者が三四人はいって来た。おれも少し驚《おど》ろいたが、壁際へ圧し付けられているんだから、じっとしてただ見ていた。すると今まで床柱《とこばしら》へもたれて例の琥珀《こはく》のパイプを自慢《じまん》そうに啣《くわ》えていた、赤シャツが急に起《た》って、座敷を出にかかった。向《むこ》うからはいって来た芸者の一人が、行き違いながら、笑って挨拶をした。その一人は一番若くて一番奇麗な奴だ。遠くで聞《きこ》えなかったが、おや今晩はぐらい云ったら�
 �い。赤シャツは知らん顔をして出て行ったぎり、顔を出さなかった。大方校長のあとを追懸《おいか》けて帰ったんだろう。　芸者が来たら座敷中急に陽気になって、一同が鬨《とき》の声を揚《あ》げて歓迎《かんげい》したのかと思うくらい、騒々《そうぞう》しい。そうしてある奴はなんこを攫《つか》む。その声の大きな事、まるで居合抜《いあいぬき》の稽古《けいこ》のようだ。こっちでは拳《けん》を打ってる。よっ、はっ、と夢中《むちゅう》で両手を振るところは、ダーク一座の操人形《あやつりにんぎょう》よりよっぽど上手《じょうず》だ。向うの隅《すみ》ではおいお酌《しゃく》だ、と徳利を振ってみて、酒だ酒だと言い直している。どうもやかましくて騒々しくってたまらない。そのうちで手持無沙汰《ても
 ちぶさた》に下を向いて考え込んでるのはうらなり君ばかりである。自分のために送別会を開いてくれたのは、自分の転任を惜《おし》んでくれるんじゃない。みんなが酒を呑《の》んで遊ぶためだ。自分独りが手持無沙汰で苦しむためだ。こんな送別会なら、開いてもらわない方がよっぽどましだ。　しばらくしたら、めいめい胴間声《どうまごえ》を出して何か唄《うた》い始めた。おれの前へ来た一人の芸者が、あんた、なんぞ、唄いなはれ、と三味線を抱《かか》えたから、おれは唄わない、貴様唄ってみろと云ったら、金《かね》や太鼓《たいこ》でねえ、迷子の迷子の三太郎と、どんどこ、どんのちゃんちきりん。叩いて廻って逢《あ》われるものならば、わたしなんぞも、金や太鼓でどんどこ、どんのちゃんちきりんと叩い�
 ��廻って逢いたい人がある、と二た息にうたって、おおしんどと云った。おおしんどなら、もっと楽なものをやればいいのに。　すると、いつの間にか傍《そば》へ来て坐った、野だが、鈴ちゃん逢いたい人に逢ったと思ったら、すぐお帰りで、お気の毒さまみたようでげすと相変らず噺《はな》し家みたような言葉使いをする。知りまへんと芸者はつんと済ました。野だは頓着《とんじゃく》なく、たまたま逢いは逢いながら……と、いやな声を出して義太夫《ぎだゆう》の真似《まね》をやる。おきなはれやと芸者は平手で野だの膝《ひざ》を叩いたら野だは恐悦《きょうえつ》して笑ってる。この芸者は赤シャツに挨拶をした奴だ。芸者に叩かれて笑うなんて、野だもおめでたい者だ。鈴ちゃん僕が紀伊《き》の国を踴《おど》るか�
 �、一つ弾《ひ》いて頂戴と云い出した。野だはこの上まだ踴る気でいる。　向うの方で漢学のお爺《じい》さんが歯のない口を歪《ゆが》めて、そりゃ聞えません伝兵衛《でんべい》さん、お前とわたしのその中は……とまでは無事に済《すま》したが、それから？　と芸者に聞いている。爺さんなんて物覚えのわるいものだ。一人が博物を捕《つら》まえて近頃《ちかごろ》こないなのが、でけましたぜ、弾いてみまほうか。よう聞いて、いなはれや――花月巻《かげつまき》、白いリボンのハイカラ頭、乗るは自転車、弾くはヴァイオリン、半可《はんか》の英語でぺらぺらと、I am glad to see you と唄うと、博物はなるほど面白い、英語入りだねと感心している。　山嵐は馬鹿に大きな声を出して、芸者、芸者と呼んで、おれが剣舞《け
 んぶ》をやるから、三味線を弾けと号令を下した。芸者はあまり乱暴な声なので、あっけに取られて返事もしない。山嵐は委細構わず、ステッキを持って来て、踏破千山万岳烟《ふみやぶるせんざんばんがくのけむり》と真中《まんなか》へ出て独りで隠《かく》し芸を演じている。ところへ野だがすでに紀伊《き》の国を済まして、かっぽれを済まして、棚《たな》の達磨《だるま》さんを済して丸裸《まるはだか》の越中褌《えっちゅうふんどし》一つになって、棕梠箒《しゅろぼうき》を小脇に抱《か》い込んで、日清談判｜破裂《はれつ》して……と座敷中練りあるき出した。まるで気違《きちが》いだ。　おれはさっきから苦しそうに袴も脱《ぬ》がず控えているうらなり君が気の毒でたまらなかったが、なんぼ自分の送別会だ�
 ��て、越中褌の裸踴《はだかおどり》まで羽織袴で我慢《がまん》してみている必要はあるまいと思ったから、そばへ行って、古賀さんもう帰りましょうと退去を勧めてみた。するとうらなり君は今日は私の送別会だから、私が先へ帰っては失礼です、どうぞご遠慮《えんりょ》なくと動く景色もない。なに構うもんですか、送別会なら、送別会らしくするがいいです、あの様をご覧なさい。気狂会《きちがいかい》です。さあ行きましょうと、進まないのを無理に勧めて、座敷を出かかるところへ、野だが箒を振り振り進行して来て、やご主人が先へ帰るとはひどい。日清談判だ。帰せないと箒を横にして行く手を塞《ふさ》いだ。おれはさっきから肝癪《かんしゃく》が起っているところだから、日清談判なら貴様はちゃんちゃんだろ�
 �と、いきなり拳骨《げんこつ》で、野だの頭をぽかりと喰《く》わしてやった。野だは二三秒の間毒気を抜かれた体《てい》で、ぼんやりしていたが、おやこれはひどい。お撲《ぶ》ちになったのは情ない。この吉川をご打擲《ちょうちゃく》とは恐れ入った。いよいよもって日清談判だ。とわからぬ事をならべているところへ、うしろから山嵐が何か騒動《そうどう》が始まったと見てとって、剣舞をやめて、飛んできたが、このていたらくを見て、いきなり頸筋《くびすじ》をうんと攫《つか》んで引き戻《もど》した。日清……いたい。いたい。どうもこれは乱暴だと振りもがくところを横に捩《ねじ》ったら、すとんと倒《たお》れた。あとはどうなったか知らない。途中《とちゅう》でうらなり君に別れて、うちへ帰ったら十一
 時過ぎだった。　　　　　十　祝勝会で学校はお休みだ。練兵場《れんぺいば》で式があるというので、狸《たぬき》は生徒を引率して参列しなくてはならない。おれも職員の一人《ひとり》としていっしょにくっついて行くんだ。町へ出ると日の丸だらけで、まぼしいくらいである。学校の生徒は八百人もあるのだから、体操の教師が隊伍《たいご》を整えて、一組一組の間を少しずつ明けて、それへ職員が一人か二人《ふたり》ずつ監督《かんとく》として割り込《こ》む仕掛《しか》けである。仕掛《しかけ》だけはすこぶる巧妙《こうみょう》なものだが、実際はすこぶる不手際である。生徒は小供《こども》の上に、生意気で、規律を破らなくっては生徒の体面にかかわると思ってる奴等《やつら》だから、職員が幾人《いくた�
 ��》ついて行ったって何の役に立つもんか。命令も下さないのに勝手な軍歌をうたったり、軍歌をやめるとワーと訳もないのに鬨《とき》の声を揚《あ》げたり、まるで浪人《ろうにん》が町内をねりあるいてるようなものだ。軍歌も鬨の声も揚げない時はがやがや何か喋舌《しゃべ》ってる。喋舌らないでも歩けそうなもんだが、日本人はみな口から先へ生れるのだから、いくら小言を云《い》ったって聞きっこない。喋舌るのもただ喋舌るのではない、教師のわる口を喋舌るんだから、下等だ。おれは宿直事件で生徒を謝罪さして、まあこれならよかろうと思っていた。ところが実際は大違《おおちが》いである。下宿の婆《ばあ》さんの言葉を借りて云えば、正に大違いの勘五郎《かんごろう》である。生徒があやまったのは心《し�
 �》から後悔《こうかい》してあやまったのではない。ただ校長から、命令されて、形式的に頭を下げたのである。商人が頭ばかり下げて、狡《ずる》い事をやめないのと一般で生徒も謝罪だけはするが、いたずらは決してやめるものでない。よく考えてみると世の中はみんなこの生徒のようなものから成立しているかも知れない。人があやまったり詫《わ》びたりするのを、真面目《まじめ》に受けて勘弁するのは正直過ぎる馬鹿《ばか》と云うんだろう。あやまるのも仮りにあやまるので、勘弁するのも仮りに勘弁するのだと思ってれば差《さ》し支《つか》えない。もし本当にあやまらせる気なら、本当に後悔するまで叩《たた》きつけなくてはいけない。　おれが組と組の間にはいって行くと、天麩羅《てんぷら》だの、団子《だん
 ご》だの、と云う声が絶えずする。しかも大勢だから、誰《だれ》が云うのだか分らない。よし分ってもおれの事を天麩羅と云ったんじゃありません、団子と申したのじゃありません、それは先生が神経衰弱《しんけいすいじゃく》だから、ひがんで、そう聞くんだぐらい云うに極《き》まってる。こんな卑劣《ひれつ》な根性は封建時代から、養成したこの土地の習慣なんだから、いくら云って聞かしたって、教えてやったって、到底《とうてい》直りっこない。こんな土地に一年も居ると、潔白なおれも、この真似《まね》をしなければならなく、なるかも知れない。向《むこ》うでうまく言い抜《ぬ》けられるような手段で、おれの顔を汚《よご》すのを抛《ほう》っておく、樗蒲一《ちょぼいち》はない。向こうが人ならおれも人�
 ��。生徒だって、子供だって、ずう体はおれより大きいや。だから刑罰《けいばつ》として何か返報をしてやらなくっては義理がわるい。ところがこっちから返報をする時分に尋常《じんじょう》の手段で行くと、向うから逆捩《さかねじ》を食わして来る。貴様がわるいからだと云うと、初手から逃《に》げ路《みち》が作ってある事だから滔々《とうとう》と弁じ立てる。弁じ立てておいて、自分の方を表向きだけ立派にしてそれからこっちの非を攻撃《こうげき》する。もともと返報にした事だから、こちらの弁護は向うの非が挙がらない上は弁護にならない。つまりは向うから手を出しておいて、世間体はこっちが仕掛けた喧嘩《けんか》のように、見傚《みな》されてしまう。大変な不利益だ。それなら向うのやるなり、愚迂多�
 �童子《ぐうたらどうじ》を極め込んでいれば、向うはますます増長するばかり、大きく云えば世の中のためにならない。そこで仕方がないから、こっちも向うの筆法を用いて捕《つら》まえられないで、手の付けようのない返報をしなくてはならなくなる。そうなっては江戸《えど》っ子も駄目《だめ》だ。駄目だが一年もこうやられる以上は、おれも人間だから駄目でも何でもそうならなくっちゃ始末がつかない。どうしても早く東京へ帰って清《きよ》といっしょになるに限る。こんな田舎《いなか》に居るのは堕落《だらく》しに来ているようなものだ。新聞配達をしたって、ここまで堕落するよりはましだ。　こう考えて、いやいや、附《つ》いてくると、何だか先鋒《せんぽう》が急にがやがや騒《さわ》ぎ出した。同時に列は
 ぴたりと留まる。変だから、列を右へはずして、向うを見ると、大手町《おおてまち》を突《つ》き当って薬師町《やくしまち》へ曲がる角の所で、行き詰《づま》ったぎり、押《お》し返したり、押し返されたりして揉《も》み合っている。前方から静かに静かにと声を涸《か》らして来た体操教師に何ですと聞くと、曲り角で中学校と師範《しはん》学校が衝突《しょうとつ》したんだと云う。　中学と師範とはどこの県下でも犬と猿《さる》のように仲がわるいそうだ。なぜだかわからないが、まるで気風が合わない。何かあると喧嘩をする。大方｜狭《せま》い田舎で退屈《たいくつ》だから、暇潰《ひまつぶ》しにやる仕事なんだろう。おれは喧嘩は好きな方だから、衝突と聞いて、面白半分に馳《か》け出して行った。すると�
 ��の方にいる連中は、しきりに何だ地方税の癖《くせ》に、引き込めと、怒鳴《どな》ってる。後ろからは押せ押せと大きな声を出す。おれは邪魔《じゃま》になる生徒の間をくぐり抜けて、曲がり角へもう少しで出ようとした時に、前へ！　と云う高く鋭《するど》い号令が聞《きこ》えたと思ったら師範学校の方は粛粛《しゅくしゅく》として行進を始めた。先を争った衝突は、折合がついたには相違《そうい》ないが、つまり中学校が一歩を譲《ゆず》ったのである。資格から云うと師範学校の方が上だそうだ。　祝勝の式はすこぶる簡単なものであった。旅団長が祝詞を読む、知事が祝詞を読む、参列者が万歳《ばんざい》を唱える。それでおしまいだ。余興は午後にあると云う話だから、ひとまず下宿へ帰って、こないだじゅう�
 �ら、気に掛《かか》っていた、清への返事をかきかけた。今度はもっと詳《くわ》しく書いてくれとの注文だから、なるべく念入《ねんいり》に認《したた》めなくっちゃならない。しかしいざとなって、半切《はんきれ》を取り上げると、書く事はたくさんあるが、何から書き出していいか、わからない。あれにしようか、あれは面倒臭《めんどうくさ》い。これにしようか、これはつまらない。何か、すらすらと出て、骨が折れなくって、そうして清が面白がるようなものはないかしらん、と考えてみると、そんな注文通りの事件は一つもなさそうだ。おれは墨《すみ》を磨《す》って、筆をしめして、巻紙を睨《にら》めて、――巻紙を睨めて、筆をしめして、墨を磨って――同じ所作を同じように何返も繰《く》り返したあと、お
 れには、とても手紙は書けるものではないと、諦《あきら》めて硯《すずり》の蓋《ふた》をしてしまった。手紙なんぞをかくのは面倒臭い。やっぱり東京まで出掛けて行って、逢《あ》って話をするのが簡便だ。清の心配は察しないでもないが、清の注文通りの手紙を書くのは三七日の断食《だんじき》よりも苦しい。　おれは筆と巻紙を抛《ほう》り出して、ごろりと転がって肱枕《ひじまくら》をして庭《にわ》の方を眺《なが》めてみたが、やっぱり清の事が気にかかる。その時おれはこう思った。こうして遠くへ来てまで、清の身の上を案じていてやりさえすれば、おれの真心《まこと》は清に通じるに違いない。通じさえすれば手紙なんぞやる必要はない。やらなければ無事で暮《くら》してると思ってるだろう。たよりは死�
 ��だ時か病気の時か、何か事の起った時にやりさえすればいい訳だ。　庭は十坪《とつぼ》ほどの平庭で、これという植木もない。ただ一本の蜜柑《みかん》があって、塀《へい》のそとから、目標《めじるし》になるほど高い。おれはうちへ帰ると、いつでもこの蜜柑を眺める。東京を出た事のないものには蜜柑の生《な》っているところはすこぶる珍《めずら》しいものだ。あの青い実がだんだん熟してきて、黄色になるんだろうが、定めて奇麗《きれい》だろう。今でももう半分色の変ったのがある。婆《ばあ》さんに聞いてみると、すこぶる水気の多い、旨《うま》い蜜柑だそうだ。今に熟《うれ》たら、たんと召《め》し上がれと云ったから、毎日少しずつ食ってやろう。もう三週間もしたら、充分《じゅうぶん》食えるだろう�
 �まさか三週間以内にここを去る事もなかろう。　おれが蜜柑の事を考えているところへ、偶然山嵐《ぐうぜんやまあらし》が話しにやって来た。今日は祝勝会だから、君といっしょにご馳走《ちそう》を食おうと思って牛肉を買って来たと、竹の皮の包《つつみ》を袂《たもと》から引きずり出して、座敷《ざしき》の真中《まんなか》へ抛り出した。おれは下宿で芋責《いもぜめ》豆腐責になってる上、蕎麦《そば》屋行き、団子《だんご》屋行きを禁じられてる際だから、そいつは結構だと、すぐ婆さんから鍋《なべ》と砂糖をかり込んで、煮方《にかた》に取りかかった。　山嵐は無暗《むやみ》に牛肉を頬張《ほおば》りながら、君あの赤シャツが芸者に馴染《なじみ》のある事を知ってるかと聞くから、知ってるとも、この間う
 らなりの送別会の時に来た一人がそうだろうと云ったら、そうだ僕《ぼく》はこの頃《ごろ》ようやく勘づいたのに、君はなかなか敏捷《びんしょう》だと大いにほめた。「あいつは、ふた言目には品性だの、精神的｜娯楽《ごらく》だのと云う癖《くせ》に、裏へ廻《まわ》って、芸者と関係なんかつけとる、怪《け》しからん奴《やつ》だ。それもほかの人が遊ぶのを寛容《かんよう》するならいいが、君が蕎麦屋へ行ったり、団子屋へはいるのさえ取締上《とりしまりじょう》害になると云って、校長の口を通して注意を加えたじゃないか」「うん、あの野郎の考えじゃ芸者買は精神的娯楽で、天麩羅や、団子は物理的娯楽なんだろう。精神的娯楽なら、もっと大べらにやるがいい。何だあの様《ざま》は。馴染の芸者がはいってく�
 ��と、入れ代りに席をはずして、逃げるなんて、どこまでも人を胡魔化《ごまか》す気だから気に食わない。そうして人が攻撃《こうげき》すると、僕は知らないとか、露西亜《ロシア》文学だとか、俳句が新体詩の兄弟分だとか云って、人を烟《けむ》に捲《ま》くつもりなんだ。あんな弱虫は男じゃないよ。全く御殿女中《ごてんじょちゅう》の生れ変りか何かだぜ。ことによると、あいつのおやじは湯島のかげま［＃「かげま」に傍点］かもしれない」「湯島のかげま［＃「かげま」に傍点］た何だ」「何でも男らしくないもんだろう。――君そこのところはまだ煮えていないぜ。そんなのを食うと絛虫《さなだむし》が湧《わ》くぜ」「そうか、大抵大丈夫《たいていだいじょうぶ》だろう。それで赤シャツは人に隠《かく》れて�
 �温泉《ゆ》の町の角屋《かどや》へ行って、芸者と会見するそうだ」「角屋って、あの宿屋か」「宿屋兼料理屋さ。だからあいつを一番へこますためには、あいつが芸者をつれて、あすこへはいり込むところを見届けておいて面詰《めんきつ》するんだね」「見届けるって、夜番《よばん》でもするのかい」「うん、角屋の前に枡屋《ますや》という宿屋があるだろう。あの表二階をかりて、障子《しょうじ》へ穴をあけて、見ているのさ」「見ているときに来るかい」「来るだろう。どうせひと晩じゃいけない。二週間ばかりやるつもりでなくっちゃ」「随分《ずいぶん》疲れるぜ。僕あ、おやじの死ぬとき一週間ばかり徹夜《てつや》して看病した事があるが、あとでぼんやりして、大いに弱った事がある」「少しぐらい身体が疲れた
 って構わんさ。あんな奸物《かんぶつ》をあのままにしておくと、日本のためにならないから、僕が天に代って誅戮《ちゅうりく》を加えるんだ」「愉快《ゆかい》だ。そう事が極まれば、おれも加勢してやる。それで今夜から夜番をやるのかい」「まだ枡屋に懸合《かけあ》ってないから、今夜は駄目だ」「それじゃ、いつから始めるつもりだい」「近々のうちやるさ。いずれ君に報知をするから、そうしたら、加勢してくれたまえ」「よろしい、いつでも加勢する。僕《ぼく》は計略《はかりごと》は下手《へた》だが、喧嘩とくるとこれでなかなかすばしこいぜ」　おれと山嵐がしきりに赤シャツ退治の計略《はかりごと》を相談していると、宿の婆さんが出て来て、学校の生徒さんが一人、堀田《ほった》先生にお目にかかりたい�
 ��てお出《い》でたぞなもし。今お宅へ参じたのじゃが、お留守《るす》じゃけれ、大方ここじゃろうてて捜《さが》し当ててお出でたのじゃがなもしと、閾《しきい》の所へ膝《ひざ》を突《つ》いて山嵐の返事を待ってる。山嵐はそうですかと玄関《げんかん》まで出て行ったが、やがて帰って来て、君、生徒が祝勝会の余興を見に行かないかって誘《さそ》いに来たんだ。今日は高知《こうち》から、何とか踴《おど》りをしに、わざわざここまで多人数《たにんず》乗り込んで来ているのだから、是非見物しろ、めったに見られない踴《おどり》だというんだ、君もいっしょに行ってみたまえと山嵐は大いに乗り気で、おれに同行を勧める。おれは踴なら東京でたくさん見ている。毎年｜八幡様《はちまんさま》のお祭りには屋台�
 �町内へ廻ってくるんだから汐酌《しおく》みでも何でもちゃんと心得ている。土佐っぽの馬鹿踴なんか、見たくもないと思ったけれども、せっかく山嵐が勧めるもんだから、つい行く気になって門へ出た。山嵐を誘いに来たものは誰かと思ったら赤シャツの弟だ。妙《みょう》な奴《やつ》が来たもんだ。　会場へはいると、回向院《えこういん》の相撲《すもう》か本門寺《ほんもんじ》の御会式《おえしき》のように幾旒《いくながれ》となく長い旗を所々に植え付けた上に、世界万国の国旗をことごとく借りて来たくらい、縄《なわ》から縄、綱《つな》から綱へ渡《わた》しかけて、大きな空が、いつになく賑《にぎ》やかに見える。東の隅《すみ》に一夜作りの舞台《ぶたい》を設けて、ここでいわゆる高知の何とか踴りをやる
 んだそうだ。舞台を右へ半町ばかりくると葭簀《よしず》の囲いをして、活花《いけばな》が陳列《ちんれつ》してある。みんなが感心して眺めているが、一向くだらないものだ。あんなに草や竹を曲げて嬉《うれ》しがるなら、背虫の色男や、跛《びっこ》の亭主《ていしゅ》を持って自慢《じまん》するがよかろう。　舞台とは反対の方面で、しきりに花火を揚げる。花火の中から風船が出た。帝国万歳《ていこくばんざい》とかいてある。天主の松の上をふわふわ飛んで営所の

<TRUNCATED>

[13/13] lucenenet git commit: Lucene.Net.Analysis.Phonetic: Added CLSCompliant(true) attribute

Posted by ni...@apache.org.

Lucene.Net.Analysis.Phonetic: Added CLSCompliant(true) attribute


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/2d5108ba
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/2d5108ba
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/2d5108ba

Branch: refs/heads/master
Commit: 2d5108ba060be2f06a6afaffeb0fedd124fe69bb
Parents: bacfcc1
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Jul 23 16:12:03 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Mon Jul 24 00:35:29 2017 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Analysis.Phonetic/Properties/AssemblyInfo.cs | 3 +++
 1 file changed, 3 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2d5108ba/src/Lucene.Net.Analysis.Phonetic/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Properties/AssemblyInfo.cs b/src/Lucene.Net.Analysis.Phonetic/Properties/AssemblyInfo.cs
index b7cd03f..022a4ea 100644
--- a/src/Lucene.Net.Analysis.Phonetic/Properties/AssemblyInfo.cs
+++ b/src/Lucene.Net.Analysis.Phonetic/Properties/AssemblyInfo.cs
@@ -19,6 +19,7 @@
  *
 */
 
+using System;
 using System.Reflection;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
@@ -34,6 +35,8 @@ using System.Runtime.InteropServices;
 [assembly: AssemblyDefaultAlias("Lucene.Net.Analysis.Phonetic")]
 [assembly: AssemblyCulture("")]
 
+[assembly: CLSCompliant(true)]
+
 // Setting ComVisible to false makes the types in this assembly not visible 
 // to COM components.  If you need to access a type in this assembly from 
 // COM, set the ComVisible attribute to true on that type.

[09/13] lucenenet git commit: Ported Lucene.Net.Analysis.Kuromoji + tests

Posted by ni...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizer.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizer.cs
new file mode 100644
index 0000000..4690549
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizer.cs
@@ -0,0 +1,1489 @@
+using Lucene.Net.Analysis.Ja.Dict;
+using Lucene.Net.Analysis.Ja.TokenAttributes;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Fst;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+using System.Threading;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tokenizer for Japanese that uses morphological analysis.
+    /// </summary>
+    /// <remarks>
+    /// This tokenizer sets a number of additional attributes:
+    /// <list type="bullet">
+    ///     <item><description><see cref="IBaseFormAttribute"/> containing base form for inflected adjectives and verbs.</description></item>
+    ///     <item><description><see cref="IPartOfSpeechAttribute"/> containing part-of-speech.</description></item>
+    ///     <item><description><see cref="IReadingAttribute"/> containing reading and pronunciation.</description></item>
+    ///     <item><description><see cref="IInflectionAttribute"/> containing additional part-of-speech information for inflected forms.</description></item>
+    /// </list>
+    /// <para/>
+    /// This tokenizer uses a rolling Viterbi search to find the 
+    /// least cost segmentation (path) of the incoming characters.
+    /// For tokens that appear to be compound (> length 2 for all
+    /// Kanji, or > length 7 for non-Kanji), we see if there is a
+    /// 2nd best segmentation of that token after applying
+    /// penalties to the long tokens.  If so, and the Mode is
+    /// <see cref="JapaneseTokenizerMode.SEARCH"/>, we output the alternate segmentation 
+    /// as well.
+    /// </remarks>
+    public sealed class JapaneseTokenizer : Tokenizer
+    {
+        // LUCENENET specific: de-nested Mode and renamed JapaneseTokenizerMode
+
+        /// <summary>
+        /// Default tokenization mode. Currently this is <see cref="JapaneseTokenizerMode.SEARCH"/>.
+        /// </summary>
+        public static readonly JapaneseTokenizerMode DEFAULT_MODE = JapaneseTokenizerMode.SEARCH;
+
+        // LUCENENET specific: de-nested Type and renamed JapaneseTokenizerType
+
+
+        private static readonly bool VERBOSE = false;
+
+        private static readonly int SEARCH_MODE_KANJI_LENGTH = 2;
+
+        private static readonly int SEARCH_MODE_OTHER_LENGTH = 7; // Must be >= SEARCH_MODE_KANJI_LENGTH
+
+        private static readonly int SEARCH_MODE_KANJI_PENALTY = 3000;
+
+        private static readonly int SEARCH_MODE_OTHER_PENALTY = 1700;
+
+        // For safety:
+        private static readonly int MAX_UNKNOWN_WORD_LENGTH = 1024;
+        private static readonly int MAX_BACKTRACE_GAP = 1024;
+
+        private readonly IDictionary<JapaneseTokenizerType, IDictionary> dictionaryMap = new Dictionary<JapaneseTokenizerType, IDictionary>();
+
+        private readonly TokenInfoFST fst;
+        private readonly TokenInfoDictionary dictionary;
+        private readonly UnknownDictionary unkDictionary;
+        private readonly ConnectionCosts costs;
+        private readonly UserDictionary userDictionary;
+        private readonly CharacterDefinition characterDefinition;
+
+        private readonly FST.Arc<long?> arc = new FST.Arc<long?>();
+        private readonly FST.BytesReader fstReader;
+        private readonly Int32sRef wordIdRef = new Int32sRef();
+
+        private readonly FST.BytesReader userFSTReader;
+        private readonly TokenInfoFST userFST;
+
+        private readonly RollingCharBuffer buffer = new RollingCharBuffer();
+
+        private readonly WrappedPositionArray positions = new WrappedPositionArray();
+
+        private readonly bool discardPunctuation;
+        private readonly bool searchMode;
+        private readonly bool extendedMode;
+        private readonly bool outputCompounds;
+
+        // Index of the last character of unknown word:
+        private int unknownWordEndIndex = -1;
+
+        // True once we've hit the EOF from the input reader:
+        private bool end;
+
+        // Last absolute position we backtraced from:
+        private int lastBackTracePos;
+
+        // Position of last token we returned; we use this to
+        // figure out whether to set posIncr to 0 or 1:
+        private int lastTokenPos;
+
+        // Next absolute position to process:
+        private int pos;
+
+        // Already parsed, but not yet passed to caller, tokens:
+        private readonly IList<Token> pending = new List<Token>();
+
+        private readonly ICharTermAttribute termAtt;
+        private readonly IOffsetAttribute offsetAtt;
+        private readonly IPositionIncrementAttribute posIncAtt;
+        private readonly IPositionLengthAttribute posLengthAtt;
+        private readonly IBaseFormAttribute basicFormAtt;
+        private readonly IPartOfSpeechAttribute posAtt;
+        private readonly IReadingAttribute readingAtt;
+        private readonly IInflectionAttribute inflectionAtt;
+
+        /// <summary>
+        /// Create a new JapaneseTokenizer.
+        /// <para/>
+        /// Uses the default AttributeFactory.
+        /// </summary>
+        /// <param name="input">TextReader containing text.</param>
+        /// <param name="userDictionary">Optional: if non-null, user dictionary.</param>
+        /// <param name="discardPunctuation"><c>true</c> if punctuation tokens should be dropped from the output.</param>
+        /// <param name="mode">Tokenization mode.</param>
+        public JapaneseTokenizer(TextReader input, UserDictionary userDictionary, bool discardPunctuation, JapaneseTokenizerMode mode)
+            : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, userDictionary, discardPunctuation, mode)
+        {
+        }
+
+        /// <summary>
+        /// Create a new JapaneseTokenizer.
+        /// </summary>
+        /// <param name="factory">The AttributeFactory to use.</param>
+        /// <param name="input">TextReader containing text.</param>
+        /// <param name="userDictionary">Optional: if non-null, user dictionary.</param>
+        /// <param name="discardPunctuation"><c>true</c> if punctuation tokens should be dropped from the output.</param>
+        /// <param name="mode">Tokenization mode.</param>
+        public JapaneseTokenizer
+            (AttributeFactory factory, TextReader input, UserDictionary userDictionary, bool discardPunctuation, JapaneseTokenizerMode mode)
+            : base(factory, input)
+        {
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+            this.offsetAtt = AddAttribute<IOffsetAttribute>();
+            this.posIncAtt = AddAttribute<IPositionIncrementAttribute>();
+            this.posLengthAtt = AddAttribute<IPositionLengthAttribute>();
+            this.basicFormAtt = AddAttribute<IBaseFormAttribute>();
+            this.posAtt = AddAttribute<IPartOfSpeechAttribute>();
+            this.readingAtt = AddAttribute<IReadingAttribute>();
+            this.inflectionAtt = AddAttribute<IInflectionAttribute>();
+
+            dictionary = TokenInfoDictionary.GetInstance();
+            fst = dictionary.FST;
+            unkDictionary = UnknownDictionary.GetInstance();
+            characterDefinition = unkDictionary.CharacterDefinition;
+            this.userDictionary = userDictionary;
+            costs = ConnectionCosts.GetInstance();
+            fstReader = fst.GetBytesReader();
+            if (userDictionary != null)
+            {
+                userFST = userDictionary.FST;
+                userFSTReader = userFST.GetBytesReader();
+            }
+            else
+            {
+                userFST = null;
+                userFSTReader = null;
+            }
+            this.discardPunctuation = discardPunctuation;
+            switch (mode)
+            {
+                case JapaneseTokenizerMode.SEARCH:
+                    searchMode = true;
+                    extendedMode = false;
+                    outputCompounds = true;
+                    break;
+                case JapaneseTokenizerMode.EXTENDED:
+                    searchMode = true;
+                    extendedMode = true;
+                    outputCompounds = false;
+                    break;
+                default:
+                    searchMode = false;
+                    extendedMode = false;
+                    outputCompounds = false;
+                    break;
+            }
+            buffer.Reset(this.m_input);
+
+            ResetState();
+
+            dictionaryMap[JapaneseTokenizerType.KNOWN] = dictionary;
+            dictionaryMap[JapaneseTokenizerType.UNKNOWN] = unkDictionary;
+            dictionaryMap[JapaneseTokenizerType.USER] = userDictionary;
+        }
+
+        private GraphvizFormatter dotOut;
+
+        // LUCENENET specific - added getter and made into property
+        // so we can set this during object initialization.
+
+        /// <summary>
+        /// Expert: set this to produce graphviz (dot) output of
+        /// the Viterbi lattice
+        /// </summary>
+        public GraphvizFormatter GraphvizFormatter
+        {
+            get { return this.dotOut; }
+            set { this.dotOut = value; }
+        }
+
+        protected override void Dispose(bool disposing)
+        {
+            base.Dispose(disposing);
+            if (disposing)
+            {
+                buffer.Reset(m_input);
+            }
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            buffer.Reset(m_input);
+            ResetState();
+        }
+
+        private void ResetState()
+        {
+            positions.Reset();
+            unknownWordEndIndex = -1;
+            pos = 0;
+            end = false;
+            lastBackTracePos = 0;
+            lastTokenPos = -1;
+            pending.Clear();
+
+            // Add BOS:
+            positions.Get(0).Add(0, 0, -1, -1, -1, JapaneseTokenizerType.KNOWN);
+        }
+
+        public override void End()
+        {
+            base.End();
+            // Set final offset
+            int finalOffset = CorrectOffset(pos);
+            offsetAtt.SetOffset(finalOffset, finalOffset);
+        }
+
+        // Returns the added cost that a 2nd best segmentation is
+        // allowed to have.  Ie, if we see path with cost X,
+        // ending in a compound word, and this method returns
+        // threshold > 0, then we will also find the 2nd best
+        // segmentation and if its path score is within this
+        // threshold of X, we'll include it in the output:
+        private int ComputeSecondBestThreshold(int pos, int length)
+        {
+            // TODO: maybe we do something else here, instead of just
+            // using the penalty...?  EG we can be more aggressive on
+            // when to also test for 2nd best path
+            return ComputePenalty(pos, length);
+        }
+
+        private int ComputePenalty(int pos, int length)
+        {
+            if (length > SEARCH_MODE_KANJI_LENGTH)
+            {
+                bool allKanji = true;
+                // check if node consists of only kanji
+                int endPos = pos + length;
+                for (int pos2 = pos; pos2 < endPos; pos2++)
+                {
+                    if (!characterDefinition.IsKanji((char)buffer.Get(pos2)))
+                    {
+                        allKanji = false;
+                        break;
+                    }
+                }
+                if (allKanji)
+                {  // Process only Kanji keywords
+                    return (length - SEARCH_MODE_KANJI_LENGTH) * SEARCH_MODE_KANJI_PENALTY;
+                }
+                else if (length > SEARCH_MODE_OTHER_LENGTH)
+                {
+                    return (length - SEARCH_MODE_OTHER_LENGTH) * SEARCH_MODE_OTHER_PENALTY;
+                }
+            }
+            return 0;
+        }
+
+        // LUCENENET specific - de-nested Position class
+
+        private void Add(IDictionary dict, Position fromPosData, int endPos, int wordID, JapaneseTokenizerType type, bool addPenalty)
+        {
+            int wordCost = dict.GetWordCost(wordID);
+            int leftID = dict.GetLeftId(wordID);
+            int leastCost = int.MaxValue;
+            int leastIDX = -1;
+            Debug.Assert(fromPosData.count > 0);
+            for (int idx = 0; idx < fromPosData.count; idx++)
+            {
+                // Cost is path cost so far, plus word cost (added at
+                // end of loop), plus bigram cost:
+                int cost = fromPosData.costs[idx] + costs.Get(fromPosData.lastRightID[idx], leftID);
+                if (VERBOSE)
+                {
+                    Console.WriteLine("      fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.Get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID);
+                }
+                if (cost < leastCost)
+                {
+                    leastCost = cost;
+                    leastIDX = idx;
+                    if (VERBOSE)
+                    {
+                        Console.WriteLine("        **");
+                    }
+                }
+            }
+
+            leastCost += wordCost;
+
+            if (VERBOSE)
+            {
+                Console.WriteLine("      + cost=" + leastCost + " wordID=" + wordID + " leftID=" + leftID + " leastIDX=" + leastIDX + " toPos=" + endPos + " toPos.idx=" + positions.Get(endPos).count);
+            }
+
+            if ((addPenalty || (!outputCompounds && searchMode)) && type != JapaneseTokenizerType.USER)
+            {
+                int penalty = ComputePenalty(fromPosData.pos, endPos - fromPosData.pos);
+                if (VERBOSE)
+                {
+                    if (penalty > 0)
+                    {
+                        Console.WriteLine("        + penalty=" + penalty + " cost=" + (leastCost + penalty));
+                    }
+                }
+                leastCost += penalty;
+            }
+
+            //positions.get(endPos).add(leastCost, dict.getRightId(wordID), fromPosData.pos, leastIDX, wordID, type);
+            Debug.Assert(leftID == dict.GetRightId(wordID));
+            positions.Get(endPos).Add(leastCost, leftID, fromPosData.pos, leastIDX, wordID, type);
+        }
+
+        public override bool IncrementToken()
+        {
+
+            // parse() is able to return w/o producing any new
+            // tokens, when the tokens it had produced were entirely
+            // punctuation.  So we loop here until we get a real
+            // token or we end:
+            while (pending.Count == 0)
+            {
+                if (end)
+                {
+                    return false;
+                }
+
+                // Push Viterbi forward some more:
+                Parse();
+            }
+
+            Token token = pending.LastOrDefault();
+            if (token != null)
+            {
+                pending.Remove(token);
+            }
+
+            int position = token.Position;
+            int length = token.Length;
+            ClearAttributes();
+            Debug.Assert(length > 0);
+            //System.out.println("off=" + token.getOffset() + " len=" + length + " vs " + token.getSurfaceForm().length);
+            termAtt.CopyBuffer(token.SurfaceForm, token.Offset, length);
+            offsetAtt.SetOffset(CorrectOffset(position), CorrectOffset(position + length));
+            basicFormAtt.SetToken(token);
+            posAtt.SetToken(token);
+            readingAtt.SetToken(token);
+            inflectionAtt.SetToken(token);
+            if (token.Position == lastTokenPos)
+            {
+                posIncAtt.PositionIncrement = 0;
+                posLengthAtt.PositionLength = token.PositionLength;
+            }
+            else
+            {
+                Debug.Assert(token.Position > lastTokenPos);
+                posIncAtt.PositionIncrement = 1;
+                posLengthAtt.PositionLength = 1;
+            }
+            if (VERBOSE)
+            {
+                Console.WriteLine(Thread.CurrentThread.Name + ":    incToken: return token=" + token);
+            }
+            lastTokenPos = token.Position;
+            return true;
+        }
+
+        /// <summary>
+        /// Incrementally parse some more characters.  This runs
+        /// the viterbi search forwards "enough" so that we
+        /// generate some more tokens.  How much forward depends on
+        /// the chars coming in, since some chars could cause
+        /// longer-lasting ambiguity in the parsing.  Once the
+        /// ambiguity is resolved, then we back trace, produce
+        /// the pending tokens, and return.
+        /// </summary>
+        private void Parse()
+        {
+            if (VERBOSE)
+            {
+                Console.WriteLine("\nPARSE");
+            }
+
+            // Advances over each position (character):
+            while (true)
+            {
+
+                if (buffer.Get(pos) == -1)
+                {
+                    // End
+                    break;
+                }
+
+                Position posData = positions.Get(pos);
+                bool isFrontier = positions.GetNextPos() == pos + 1;
+
+                if (posData.count == 0)
+                {
+                    // No arcs arrive here; move to next position:
+                    if (VERBOSE)
+                    {
+                        Console.WriteLine("    no arcs in; skip pos=" + pos);
+                    }
+                    pos++;
+                    continue;
+                }
+
+                if (pos > lastBackTracePos && posData.count == 1 && isFrontier)
+                {
+                    //  if (pos > lastBackTracePos && posData.count == 1 && isFrontier) {
+                    // We are at a "frontier", and only one node is
+                    // alive, so whatever the eventual best path is must
+                    // come through this node.  So we can safely commit
+                    // to the prefix of the best path at this point:
+                    Backtrace(posData, 0);
+
+                    // Re-base cost so we don't risk int overflow:
+                    posData.costs[0] = 0;
+
+                    if (pending.Count != 0)
+                    {
+                        return;
+                    }
+                    else
+                    {
+                        // This means the backtrace only produced
+                        // punctuation tokens, so we must keep parsing.
+                    }
+                }
+
+                if (pos - lastBackTracePos >= MAX_BACKTRACE_GAP)
+                {
+                    // Safety: if we've buffered too much, force a
+                    // backtrace now.  We find the least-cost partial
+                    // path, across all paths, backtrace from it, and
+                    // then prune all others.  Note that this, in
+                    // general, can produce the wrong result, if the
+                    // total best path did not in fact back trace
+                    // through this partial best path.  But it's the
+                    // best we can do... (short of not having a
+                    // safety!).
+
+                    // First pass: find least cost partial path so far,
+                    // including ending at future positions:
+                    int leastIDX = -1;
+                    int leastCost = int.MaxValue;
+                    Position leastPosData = null;
+                    for (int pos2 = pos; pos2 < positions.GetNextPos(); pos2++)
+                    {
+                        Position posData2 = positions.Get(pos2);
+                        for (int idx = 0; idx < posData2.count; idx++)
+                        {
+                            //System.out.println("    idx=" + idx + " cost=" + cost);
+                            int cost = posData2.costs[idx];
+                            if (cost < leastCost)
+                            {
+                                leastCost = cost;
+                                leastIDX = idx;
+                                leastPosData = posData2;
+                            }
+                        }
+                    }
+
+                    // We will always have at least one live path:
+                    Debug.Assert(leastIDX != -1);
+
+                    // Second pass: prune all but the best path:
+                    for (int pos2 = pos; pos2 < positions.GetNextPos(); pos2++)
+                    {
+                        Position posData2 = positions.Get(pos2);
+                        if (posData2 != leastPosData)
+                        {
+                            posData2.Reset();
+                        }
+                        else
+                        {
+                            if (leastIDX != 0)
+                            {
+                                posData2.costs[0] = posData2.costs[leastIDX];
+                                posData2.lastRightID[0] = posData2.lastRightID[leastIDX];
+                                posData2.backPos[0] = posData2.backPos[leastIDX];
+                                posData2.backIndex[0] = posData2.backIndex[leastIDX];
+                                posData2.backID[0] = posData2.backID[leastIDX];
+                                posData2.backType[0] = posData2.backType[leastIDX];
+                            }
+                            posData2.count = 1;
+                        }
+                    }
+
+                    Backtrace(leastPosData, 0);
+
+                    // Re-base cost so we don't risk int overflow:
+                    Arrays.Fill(leastPosData.costs, 0, leastPosData.count, 0);
+
+                    if (pos != leastPosData.pos)
+                    {
+                        // We jumped into a future position:
+                        Debug.Assert(pos < leastPosData.pos);
+                        pos = leastPosData.pos;
+                    }
+
+                    if (pending.Count != 0)
+                    {
+                        return;
+                    }
+                    else
+                    {
+                        // This means the backtrace only produced
+                        // punctuation tokens, so we must keep parsing.
+                        continue;
+                    }
+                }
+
+                if (VERBOSE)
+                {
+                    Console.WriteLine("\n  extend @ pos=" + pos + " char=" + (char)buffer.Get(pos));
+                }
+
+                if (VERBOSE)
+                {
+                    Console.WriteLine("    " + posData.count + " arcs in");
+                }
+
+                bool anyMatches = false;
+
+                // First try user dict:
+                if (userFST != null)
+                {
+                    userFST.GetFirstArc(arc);
+                    int output = 0;
+                    for (int posAhead = posData.pos; ; posAhead++)
+                    {
+                        int ch = buffer.Get(posAhead);
+                        if (ch == -1)
+                        {
+                            break;
+                        }
+                        if (userFST.FindTargetArc(ch, arc, arc, posAhead == posData.pos, userFSTReader) == null)
+                        {
+                            break;
+                        }
+                        output += (int)arc.Output;
+                        if (arc.IsFinal)
+                        {
+                            if (VERBOSE)
+                            {
+                                Console.WriteLine("    USER word " + new string(buffer.Get(pos, posAhead - pos + 1)) + " toPos=" + (posAhead + 1));
+                            }
+                            Add(userDictionary, posData, posAhead + 1, output + (int)arc.NextFinalOutput, JapaneseTokenizerType.USER, false);
+                            anyMatches = true;
+                        }
+                    }
+                }
+
+                // TODO: we can be more aggressive about user
+                // matches?  if we are "under" a user match then don't
+                // extend KNOWN/UNKNOWN paths?
+
+                if (!anyMatches)
+                {
+                    // Next, try known dictionary matches
+                    fst.GetFirstArc(arc);
+                    int output = 0;
+
+                    for (int posAhead = posData.pos; ; posAhead++)
+                    {
+                        int ch = buffer.Get(posAhead);
+                        if (ch == -1)
+                        {
+                            break;
+                        }
+                        //System.out.println("    match " + (char) ch + " posAhead=" + posAhead);
+
+                        if (fst.FindTargetArc(ch, arc, arc, posAhead == posData.pos, fstReader) == null)
+                        {
+                            break;
+                        }
+
+                        output += (int)arc.Output;
+
+                        // Optimization: for known words that are too-long
+                        // (compound), we should pre-compute the 2nd
+                        // best segmentation and store it in the
+                        // dictionary instead of recomputing it each time a
+                        // match is found.
+
+                        if (arc.IsFinal)
+                        {
+                            dictionary.LookupWordIds(output + (int)arc.NextFinalOutput, wordIdRef);
+                            if (VERBOSE)
+                            {
+                                Console.WriteLine("    KNOWN word " + new string(buffer.Get(pos, posAhead - pos + 1)) + " toPos=" + (posAhead + 1) + " " + wordIdRef.Length + " wordIDs");
+                            }
+                            for (int ofs = 0; ofs < wordIdRef.Length; ofs++)
+                            {
+                                Add(dictionary, posData, posAhead + 1, wordIdRef.Int32s[wordIdRef.Offset + ofs], JapaneseTokenizerType.KNOWN, false);
+                                anyMatches = true;
+                            }
+                        }
+                    }
+                }
+
+                // In the case of normal mode, it doesn't process unknown word greedily.
+
+                if (!searchMode && unknownWordEndIndex > posData.pos)
+                {
+                    pos++;
+                    continue;
+                }
+
+                char firstCharacter = (char)buffer.Get(pos);
+                if (!anyMatches || characterDefinition.IsInvoke(firstCharacter))
+                {
+
+                    // Find unknown match:
+                    int characterId = characterDefinition.GetCharacterClass(firstCharacter);
+                    bool isPunct = IsPunctuation(firstCharacter);
+
+                    // NOTE: copied from UnknownDictionary.lookup:
+                    int unknownWordLength;
+                    if (!characterDefinition.IsGroup(firstCharacter))
+                    {
+                        unknownWordLength = 1;
+                    }
+                    else
+                    {
+                        // Extract unknown word. Characters with the same character class are considered to be part of unknown word
+                        unknownWordLength = 1;
+                        for (int posAhead = pos + 1; unknownWordLength < MAX_UNKNOWN_WORD_LENGTH; posAhead++)
+                        {
+                            int ch = buffer.Get(posAhead);
+                            if (ch == -1)
+                            {
+                                break;
+                            }
+                            if (characterId == characterDefinition.GetCharacterClass((char)ch) &&
+                                IsPunctuation((char)ch) == isPunct)
+                            {
+                                unknownWordLength++;
+                            }
+                            else
+                            {
+                                break;
+                            }
+                        }
+                    }
+
+                    unkDictionary.LookupWordIds(characterId, wordIdRef); // characters in input text are supposed to be the same
+                    if (VERBOSE)
+                    {
+                        Console.WriteLine("    UNKNOWN word len=" + unknownWordLength + " " + wordIdRef.Length + " wordIDs");
+                    }
+                    for (int ofs = 0; ofs < wordIdRef.Length; ofs++)
+                    {
+                        Add(unkDictionary, posData, posData.pos + unknownWordLength, wordIdRef.Int32s[wordIdRef.Offset + ofs], JapaneseTokenizerType.UNKNOWN, false);
+                    }
+
+                    unknownWordEndIndex = posData.pos + unknownWordLength;
+                }
+
+                pos++;
+            }
+
+            end = true;
+
+            if (pos > 0)
+            {
+
+                Position endPosData = positions.Get(pos);
+                int leastCost = int.MaxValue;
+                int leastIDX = -1;
+                if (VERBOSE)
+                {
+                    Console.WriteLine("  end: " + endPosData.count + " nodes");
+                }
+                for (int idx = 0; idx < endPosData.count; idx++)
+                {
+                    // Add EOS cost:
+                    int cost = endPosData.costs[idx] + costs.Get(endPosData.lastRightID[idx], 0);
+                    //System.out.println("    idx=" + idx + " cost=" + cost + " (pathCost=" + endPosData.costs[idx] + " bgCost=" + costs.get(endPosData.lastRightID[idx], 0) + ") backPos=" + endPosData.backPos[idx]);
+                    if (cost < leastCost)
+                    {
+                        leastCost = cost;
+                        leastIDX = idx;
+                    }
+                }
+
+                Backtrace(endPosData, leastIDX);
+            }
+            else
+            {
+                // No characters in the input string; return no tokens!
+            }
+        }
+
+        // Eliminates arcs from the lattice that are compound
+        // tokens (have a penalty) or are not congruent with the
+        // compound token we've matched (ie, span across the
+        // startPos).  This should be fairly efficient, because we
+        // just keep the already intersected structure of the
+        // graph, eg we don't have to consult the FSTs again:
+
+        private void PruneAndRescore(int startPos, int endPos, int bestStartIDX)
+        {
+            if (VERBOSE)
+            {
+                Console.WriteLine("  pruneAndRescore startPos=" + startPos + " endPos=" + endPos + " bestStartIDX=" + bestStartIDX);
+            }
+
+            // First pass: walk backwards, building up the forward
+            // arcs and pruning inadmissible arcs:
+            for (int pos = endPos; pos > startPos; pos--)
+            {
+                Position posData = positions.Get(pos);
+                if (VERBOSE)
+                {
+                    Console.WriteLine("    back pos=" + pos);
+                }
+                for (int arcIDX = 0; arcIDX < posData.count; arcIDX++)
+                {
+                    int backPos = posData.backPos[arcIDX];
+                    if (backPos >= startPos)
+                    {
+                        // Keep this arc:
+                        //System.out.println("      keep backPos=" + backPos);
+                        positions.Get(backPos).AddForward(pos,
+                                                          arcIDX,
+                                                          posData.backID[arcIDX],
+                                                          posData.backType[arcIDX]);
+                    }
+                    else
+                    {
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("      prune");
+                        }
+                    }
+                }
+                if (pos != startPos)
+                {
+                    posData.count = 0;
+                }
+            }
+
+            // Second pass: walk forward, re-scoring:
+            for (int pos = startPos; pos < endPos; pos++)
+            {
+                Position posData = positions.Get(pos);
+                if (VERBOSE)
+                {
+                    Console.WriteLine("    forward pos=" + pos + " count=" + posData.forwardCount);
+                }
+                if (posData.count == 0)
+                {
+                    // No arcs arrive here...
+                    if (VERBOSE)
+                    {
+                        Console.WriteLine("      skip");
+                    }
+                    posData.forwardCount = 0;
+                    continue;
+                }
+
+                if (pos == startPos)
+                {
+                    // On the initial position, only consider the best
+                    // path so we "force congruence":  the
+                    // sub-segmentation is "in context" of what the best
+                    // path (compound token) had matched:
+                    int rightID;
+                    if (startPos == 0)
+                    {
+                        rightID = 0;
+                    }
+                    else
+                    {
+                        rightID = GetDict(posData.backType[bestStartIDX]).GetRightId(posData.backID[bestStartIDX]);
+                    }
+                    int pathCost = posData.costs[bestStartIDX];
+                    for (int forwardArcIDX = 0; forwardArcIDX < posData.forwardCount; forwardArcIDX++)
+                    {
+                        JapaneseTokenizerType forwardType = posData.forwardType[forwardArcIDX];
+                        IDictionary dict2 = GetDict(forwardType);
+                        int wordID = posData.forwardID[forwardArcIDX];
+                        int toPos = posData.forwardPos[forwardArcIDX];
+                        int newCost = pathCost + dict2.GetWordCost(wordID) +
+                          costs.Get(rightID, dict2.GetLeftId(wordID)) +
+                          ComputePenalty(pos, toPos - pos);
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("      + " + forwardType + " word " + new string(buffer.Get(pos, toPos - pos)) + " toPos=" + toPos + " cost=" + newCost + " penalty=" + ComputePenalty(pos, toPos - pos) + " toPos.idx=" + positions.Get(toPos).count);
+                        }
+                        positions.Get(toPos).Add(newCost,
+                                                 dict2.GetRightId(wordID),
+                                                 pos,
+                                                 bestStartIDX,
+                                                 wordID,
+                                                 forwardType);
+                    }
+                }
+                else
+                {
+                    // On non-initial positions, we maximize score
+                    // across all arriving lastRightIDs:
+                    for (int forwardArcIDX = 0; forwardArcIDX < posData.forwardCount; forwardArcIDX++)
+                    {
+                        JapaneseTokenizerType forwardType = posData.forwardType[forwardArcIDX];
+                        int toPos = posData.forwardPos[forwardArcIDX];
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("      + " + forwardType + " word " + new string(buffer.Get(pos, toPos - pos)) + " toPos=" + toPos);
+                        }
+                        Add(GetDict(forwardType),
+                            posData,
+                            toPos,
+                            posData.forwardID[forwardArcIDX],
+                            forwardType,
+                            true);
+                    }
+                }
+                posData.forwardCount = 0;
+            }
+        }
+
+        // Backtrace from the provided position, back to the last
+        // time we back-traced, accumulating the resulting tokens to
+        // the pending list.  The pending list is then in-reverse
+        // (last token should be returned first).
+        private void Backtrace(Position endPosData, int fromIDX)
+        {
+            int endPos = endPosData.pos;
+
+            if (VERBOSE)
+            {
+                Console.WriteLine("\n  backtrace: endPos=" + endPos + " pos=" + this.pos + "; " + (this.pos - lastBackTracePos) + " characters; last=" + lastBackTracePos + " cost=" + endPosData.costs[fromIDX]);
+            }
+
+            char[] fragment = buffer.Get(lastBackTracePos, endPos - lastBackTracePos);
+
+            if (dotOut != null)
+            {
+                dotOut.OnBacktrace(this, positions, lastBackTracePos, endPosData, fromIDX, fragment, end);
+            }
+
+            int pos = endPos;
+            int bestIDX = fromIDX;
+            Token altToken = null;
+
+            // We trace backwards, so this will be the leftWordID of
+            // the token after the one we are now on:
+            int lastLeftWordID = -1;
+
+            int backCount = 0;
+
+            // TODO: sort of silly to make Token instances here; the
+            // back trace has all info needed to generate the
+            // token.  So, we could just directly set the attrs,
+            // from the backtrace, in incrementToken w/o ever
+            // creating Token; we'd have to defer calling freeBefore
+            // until after the backtrace was fully "consumed" by
+            // incrementToken.
+
+            while (pos > lastBackTracePos)
+            {
+                //System.out.println("BT: back pos=" + pos + " bestIDX=" + bestIDX);
+                Position posData = positions.Get(pos);
+                Debug.Assert(bestIDX < posData.count);
+
+                int backPos = posData.backPos[bestIDX];
+                Debug.Assert(backPos >= lastBackTracePos, "backPos=" + backPos + " vs lastBackTracePos=" + lastBackTracePos);
+                int length = pos - backPos;
+                JapaneseTokenizerType backType = posData.backType[bestIDX];
+                int backID = posData.backID[bestIDX];
+                int nextBestIDX = posData.backIndex[bestIDX];
+
+                if (outputCompounds && searchMode && altToken == null && backType != JapaneseTokenizerType.USER)
+                {
+
+                    // In searchMode, if best path had picked a too-long
+                    // token, we use the "penalty" to compute the allowed
+                    // max cost of an alternate back-trace.  If we find an
+                    // alternate back trace with cost below that
+                    // threshold, we pursue it instead (but also output
+                    // the long token).
+                    //System.out.println("    2nd best backPos=" + backPos + " pos=" + pos);
+
+                    int penalty = ComputeSecondBestThreshold(backPos, pos - backPos);
+
+                    if (penalty > 0)
+                    {
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("  compound=" + new string(buffer.Get(backPos, pos - backPos)) + " backPos=" + backPos + " pos=" + pos + " penalty=" + penalty + " cost=" + posData.costs[bestIDX] + " bestIDX=" + bestIDX + " lastLeftID=" + lastLeftWordID);
+                        }
+
+                        // Use the penalty to set maxCost on the 2nd best
+                        // segmentation:
+                        int maxCost = posData.costs[bestIDX] + penalty;
+                        if (lastLeftWordID != -1)
+                        {
+                            maxCost += costs.Get(GetDict(backType).GetRightId(backID), lastLeftWordID);
+                        }
+
+                        // Now, prune all too-long tokens from the graph:
+                        PruneAndRescore(backPos, pos,
+                                        posData.backIndex[bestIDX]);
+
+                        // Finally, find 2nd best back-trace and resume
+                        // backtrace there:
+                        int leastCost = int.MaxValue;
+                        int leastIDX = -1;
+                        for (int idx = 0; idx < posData.count; idx++)
+                        {
+                            int cost = posData.costs[idx];
+                            //System.out.println("    idx=" + idx + " prevCost=" + cost);
+
+                            if (lastLeftWordID != -1)
+                            {
+                                cost += costs.Get(GetDict(posData.backType[idx]).GetRightId(posData.backID[idx]),
+                                                  lastLeftWordID);
+                                //System.out.println("      += bgCost=" + costs.get(getDict(posData.backType[idx]).getRightId(posData.backID[idx]),
+                                //lastLeftWordID) + " -> " + cost);
+                            }
+                            //System.out.println("penalty " + posData.backPos[idx] + " to " + pos);
+                            //cost += computePenalty(posData.backPos[idx], pos - posData.backPos[idx]);
+                            if (cost < leastCost)
+                            {
+                                //System.out.println("      ** ");
+                                leastCost = cost;
+                                leastIDX = idx;
+                            }
+                        }
+                        //System.out.println("  leastIDX=" + leastIDX);
+
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("  afterPrune: " + posData.count + " arcs arriving; leastCost=" + leastCost + " vs threshold=" + maxCost + " lastLeftWordID=" + lastLeftWordID);
+                        }
+
+                        if (leastIDX != -1 && leastCost <= maxCost && posData.backPos[leastIDX] != backPos)
+                        {
+                            // We should have pruned the altToken from the graph:
+                            Debug.Assert(posData.backPos[leastIDX] != backPos);
+
+                            // Save the current compound token, to output when
+                            // this alternate path joins back:
+                            altToken = new Token(backID,
+                                                 fragment,
+                                                 backPos - lastBackTracePos,
+                                                 length,
+                                                 backType,
+                                                 backPos,
+                                                 GetDict(backType));
+
+                            // Redirect our backtrace to 2nd best:
+                            bestIDX = leastIDX;
+                            nextBestIDX = posData.backIndex[bestIDX];
+
+                            backPos = posData.backPos[bestIDX];
+                            length = pos - backPos;
+                            backType = posData.backType[bestIDX];
+                            backID = posData.backID[bestIDX];
+                            backCount = 0;
+                            //System.out.println("  do alt token!");
+
+                        }
+                        else
+                        {
+                            // I think in theory it's possible there is no
+                            // 2nd best path, which is fine; in this case we
+                            // only output the compound token:
+                            //System.out.println("  no alt token! bestIDX=" + bestIDX);
+                        }
+                    }
+                }
+
+                int offset = backPos - lastBackTracePos;
+                Debug.Assert(offset >= 0);
+
+                if (altToken != null && altToken.Position >= backPos)
+                {
+
+                    // We've backtraced to the position where the
+                    // compound token starts; add it now:
+
+                    // The pruning we did when we created the altToken
+                    // ensures that the back trace will align back with
+                    // the start of the altToken:
+                    Debug.Assert(altToken.Position == backPos, altToken.Position + " vs " + backPos);
+
+                    // NOTE: not quite right: the compound token may
+                    // have had all punctuation back traced so far, but
+                    // then the decompounded token at this position is
+                    // not punctuation.  In this case backCount is 0,
+                    // but we should maybe add the altToken anyway...?
+
+                    if (backCount > 0)
+                    {
+                        backCount++;
+                        altToken.PositionLength = backCount;
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("    add altToken=" + altToken);
+                        }
+                        pending.Add(altToken);
+                    }
+                    else
+                    {
+                        // This means alt token was all punct tokens:
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("    discard all-punctuation altToken=" + altToken);
+                        }
+                        Debug.Assert(discardPunctuation);
+                    }
+                    altToken = null;
+                }
+
+                IDictionary dict = GetDict(backType);
+
+                if (backType == JapaneseTokenizerType.USER)
+                {
+
+                    // Expand the phraseID we recorded into the actual
+                    // segmentation:
+                    int[] wordIDAndLength = userDictionary.LookupSegmentation(backID);
+                    int wordID = wordIDAndLength[0];
+                    int current = 0;
+                    for (int j = 1; j < wordIDAndLength.Length; j++)
+                    {
+                        int len = wordIDAndLength[j];
+                        //System.out.println("    add user: len=" + len);
+                        pending.Add(new Token(wordID + j - 1,
+                                              fragment,
+                                              current + offset,
+                                              len,
+                                              JapaneseTokenizerType.USER,
+                                              current + backPos,
+                                              dict));
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("    add USER token=" + pending[pending.Count - 1]);
+                        }
+                        current += len;
+                    }
+
+                    // Reverse the tokens we just added, because when we
+                    // serve them up from incrementToken we serve in
+                    // reverse:
+                    Collections.Reverse(pending.SubList(pending.Count - (wordIDAndLength.Length - 1),
+                                                        pending.Count));
+
+                    backCount += wordIDAndLength.Length - 1;
+                }
+                else
+                {
+
+                    if (extendedMode && backType == JapaneseTokenizerType.UNKNOWN)
+                    {
+                        // In EXTENDED mode we convert unknown word into
+                        // unigrams:
+                        int unigramTokenCount = 0;
+                        for (int i = length - 1; i >= 0; i--)
+                        {
+                            int charLen = 1;
+                            if (i > 0 && char.IsLowSurrogate(fragment[offset + i]))
+                            {
+                                i--;
+                                charLen = 2;
+                            }
+                            //System.out.println("    extended tok offset="
+                            //+ (offset + i));
+                            if (!discardPunctuation || !IsPunctuation(fragment[offset + i]))
+                            {
+                                pending.Add(new Token(CharacterDefinition.NGRAM,
+                                                      fragment,
+                                                      offset + i,
+                                                      charLen,
+                                                      JapaneseTokenizerType.UNKNOWN,
+                                                      backPos + i,
+                                                      unkDictionary));
+                                unigramTokenCount++;
+                            }
+                        }
+                        backCount += unigramTokenCount;
+
+                    }
+                    else if (!discardPunctuation || length == 0 || !IsPunctuation(fragment[offset]))
+                    {
+                        pending.Add(new Token(backID,
+                                              fragment,
+                                              offset,
+                                              length,
+                                              backType,
+                                              backPos,
+                                              dict));
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("    add token=" + pending[pending.Count - 1]);
+                        }
+                        backCount++;
+                    }
+                    else
+                    {
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("    skip punctuation token=" + new string(fragment, offset, length));
+                        }
+                    }
+                }
+
+                lastLeftWordID = dict.GetLeftId(backID);
+                pos = backPos;
+                bestIDX = nextBestIDX;
+            }
+
+            lastBackTracePos = endPos;
+
+            if (VERBOSE)
+            {
+                Console.WriteLine("  freeBefore pos=" + endPos);
+            }
+            // Notify the circular buffers that we are done with
+            // these positions:
+            buffer.FreeBefore(endPos);
+            positions.FreeBefore(endPos);
+        }
+
+        internal IDictionary GetDict(JapaneseTokenizerType type)
+        {
+            IDictionary result;
+            dictionaryMap.TryGetValue(type, out result);
+            return result;
+        }
+
+        private static bool IsPunctuation(char ch)
+        {
+            switch (Character.GetType(ch))
+            {
+                case UnicodeCategory.SpaceSeparator:
+                case UnicodeCategory.LineSeparator:
+                case UnicodeCategory.ParagraphSeparator:
+                case UnicodeCategory.Control:
+                case UnicodeCategory.Format:
+                case UnicodeCategory.DashPunctuation:
+                case UnicodeCategory.OpenPunctuation:
+                case UnicodeCategory.ClosePunctuation:
+                case UnicodeCategory.ConnectorPunctuation:
+                case UnicodeCategory.OtherPunctuation:
+                case UnicodeCategory.MathSymbol:
+                case UnicodeCategory.CurrencySymbol:
+                case UnicodeCategory.ModifierSymbol:
+                case UnicodeCategory.OtherSymbol:
+                case UnicodeCategory.InitialQuotePunctuation:
+                case UnicodeCategory.FinalQuotePunctuation:
+                    return true;
+                default:
+                    return false;
+            }
+        }
+    }
+
+    // LUCENENET specific - de-nested Mode and renamed JapaneseTokenizerMode
+
+    /// <summary>
+    /// Tokenization mode: this determines how the tokenizer handles
+    /// compound and unknown words.
+    /// </summary>
+    public enum JapaneseTokenizerMode
+    {
+        /// <summary>
+        /// Ordinary segmentation: no decomposition for compounds,
+        /// </summary>
+        NORMAL,
+
+        /// <summary>
+        /// Segmentation geared towards search: this includes a 
+        /// decompounding process for long nouns, also including
+        /// the full compound token as a synonym.
+        /// </summary>
+        SEARCH,
+
+        /// <summary>
+        /// Extended mode outputs unigrams for unknown words.
+        /// <para/>
+        /// @lucene.experimental
+        /// </summary>
+        EXTENDED
+    }
+
+    // LUCENENET specific: de-nested Type and renamed JapaneseTokenizerType
+
+    /// <summary>
+    /// Token type reflecting the original source of this token
+    /// </summary>
+    public enum JapaneseTokenizerType
+    {
+        /// <summary>
+        /// Known words from the system dictionary.
+        /// </summary>
+        KNOWN,
+        /// <summary>
+        /// Unknown words (heuristically segmented).
+        /// </summary>
+        UNKNOWN,
+        /// <summary>
+        /// Known words from the user dictionary.
+        /// </summary>
+        USER
+    }
+
+
+    // LUCENENET specific - De-nested Position
+
+    // Holds all back pointers arriving to this position:
+    internal sealed class Position
+    {
+
+        internal int pos;
+
+        internal int count;
+
+        // maybe single int array * 5?
+        internal int[] costs = new int[8];
+        internal int[] lastRightID = new int[8];
+        internal int[] backPos = new int[8];
+        internal int[] backIndex = new int[8];
+        internal int[] backID = new int[8];
+        internal JapaneseTokenizerType[] backType = new JapaneseTokenizerType[8];
+
+        // Only used when finding 2nd best segmentation under a
+        // too-long token:
+        internal int forwardCount;
+        internal int[] forwardPos = new int[8];
+        internal int[] forwardID = new int[8];
+        internal int[] forwardIndex = new int[8];
+        internal JapaneseTokenizerType[] forwardType = new JapaneseTokenizerType[8];
+
+        public void Grow()
+        {
+            costs = ArrayUtil.Grow(costs, 1 + count);
+            lastRightID = ArrayUtil.Grow(lastRightID, 1 + count);
+            backPos = ArrayUtil.Grow(backPos, 1 + count);
+            backIndex = ArrayUtil.Grow(backIndex, 1 + count);
+            backID = ArrayUtil.Grow(backID, 1 + count);
+
+            // NOTE: sneaky: grow separately because
+            // ArrayUtil.grow will otherwise pick a different
+            // length than the int[]s we just grew:
+            JapaneseTokenizerType[] newBackType = new JapaneseTokenizerType[backID.Length];
+            System.Array.Copy(backType, 0, newBackType, 0, backType.Length);
+            backType = newBackType;
+        }
+
+        public void GrowForward()
+        {
+            forwardPos = ArrayUtil.Grow(forwardPos, 1 + forwardCount);
+            forwardID = ArrayUtil.Grow(forwardID, 1 + forwardCount);
+            forwardIndex = ArrayUtil.Grow(forwardIndex, 1 + forwardCount);
+
+            // NOTE: sneaky: grow separately because
+            // ArrayUtil.grow will otherwise pick a different
+            // length than the int[]s we just grew:
+            JapaneseTokenizerType[] newForwardType = new JapaneseTokenizerType[forwardPos.Length];
+            System.Array.Copy(forwardType, 0, newForwardType, 0, forwardType.Length);
+            forwardType = newForwardType;
+        }
+
+        public void Add(int cost, int lastRightID, int backPos, int backIndex, int backID, JapaneseTokenizerType backType)
+        {
+            // NOTE: this isn't quite a true Viterbi search,
+            // because we should check if lastRightID is
+            // already present here, and only update if the new
+            // cost is less than the current cost, instead of
+            // simply appending.  However, that will likely hurt
+            // performance (usually we add a lastRightID only once),
+            // and it means we actually create the full graph
+            // intersection instead of a "normal" Viterbi lattice:
+            if (count == costs.Length)
+            {
+                Grow();
+            }
+            this.costs[count] = cost;
+            this.lastRightID[count] = lastRightID;
+            this.backPos[count] = backPos;
+            this.backIndex[count] = backIndex;
+            this.backID[count] = backID;
+            this.backType[count] = backType;
+            count++;
+        }
+
+        public void AddForward(int forwardPos, int forwardIndex, int forwardID, JapaneseTokenizerType forwardType)
+        {
+            if (forwardCount == this.forwardID.Length)
+            {
+                GrowForward();
+            }
+            this.forwardPos[forwardCount] = forwardPos;
+            this.forwardIndex[forwardCount] = forwardIndex;
+            this.forwardID[forwardCount] = forwardID;
+            this.forwardType[forwardCount] = forwardType;
+            forwardCount++;
+        }
+
+        public void Reset()
+        {
+            count = 0;
+            // forwardCount naturally resets after it runs:
+            Debug.Assert(forwardCount == 0, "pos=" + pos + " forwardCount=" + forwardCount);
+        }
+    }
+
+
+    // LUCENENET specific - de-nested WrappedPositionArray
+
+    // TODO: make generic'd version of this "circular array"?
+    // It's a bit tricky because we do things to the Position
+    // (eg, set .pos = N on reuse)...
+    internal sealed class WrappedPositionArray
+    {
+        private Position[] positions = new Position[8];
+
+        public WrappedPositionArray()
+        {
+            for (int i = 0; i < positions.Length; i++)
+            {
+                positions[i] = new Position();
+            }
+        }
+
+        // Next array index to write to in positions:
+        private int nextWrite;
+
+        // Next position to write:
+        private int nextPos;
+
+        // How many valid Position instances are held in the
+        // positions array:
+        private int count;
+
+        public void Reset()
+        {
+            nextWrite--;
+            while (count > 0)
+            {
+                if (nextWrite == -1)
+                {
+                    nextWrite = positions.Length - 1;
+                }
+                positions[nextWrite--].Reset();
+                count--;
+            }
+            nextWrite = 0;
+            nextPos = 0;
+            count = 0;
+        }
+
+        /// <summary>
+        /// Get Position instance for this absolute position;
+        /// this is allowed to be arbitrarily far "in the
+        /// future" but cannot be before the last freeBefore.
+        /// </summary>
+        public Position Get(int pos)
+        {
+            while (pos >= nextPos)
+            {
+                //System.out.println("count=" + count + " vs len=" + positions.length);
+                if (count == positions.Length)
+                {
+                    Position[] newPositions = new Position[ArrayUtil.Oversize(1 + count, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+                    //System.out.println("grow positions " + newPositions.length);
+                    System.Array.Copy(positions, nextWrite, newPositions, 0, positions.Length - nextWrite);
+                    System.Array.Copy(positions, 0, newPositions, positions.Length - nextWrite, nextWrite);
+                    for (int i = positions.Length; i < newPositions.Length; i++)
+                    {
+                        newPositions[i] = new Position();
+                    }
+                    nextWrite = positions.Length;
+                    positions = newPositions;
+                }
+                if (nextWrite == positions.Length)
+                {
+                    nextWrite = 0;
+                }
+                // Should have already been reset:
+                Debug.Assert(positions[nextWrite].count == 0);
+                positions[nextWrite++].pos = nextPos++;
+                count++;
+            }
+            Debug.Assert(InBounds(pos));
+            int index = GetIndex(pos);
+            Debug.Assert(positions[index].pos == pos);
+            return positions[index];
+        }
+
+        public int GetNextPos()
+        {
+            return nextPos;
+        }
+
+        // For assert:
+        private bool InBounds(int pos)
+        {
+            return pos < nextPos && pos >= nextPos - count;
+        }
+
+        private int GetIndex(int pos)
+        {
+            int index = nextWrite - (nextPos - pos);
+            if (index < 0)
+            {
+                index += positions.Length;
+            }
+            return index;
+        }
+
+        public void FreeBefore(int pos)
+        {
+            int toFree = count - (nextPos - pos);
+            Debug.Assert(toFree >= 0);
+            Debug.Assert(toFree <= count);
+            int index = nextWrite - count;
+            if (index < 0)
+            {
+                index += positions.Length;
+            }
+            for (int i = 0; i < toFree; i++)
+            {
+                if (index == positions.Length)
+                {
+                    index = 0;
+                }
+                //System.out.println("  fb idx=" + index);
+                positions[index].Reset();
+                index++;
+            }
+            count -= toFree;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs
new file mode 100644
index 0000000..5fe99d5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs
@@ -0,0 +1,100 @@
+using Lucene.Net.Analysis.Ja.Dict;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="JapaneseTokenizer"/>.
+    /// <code>
+    /// &lt;fieldType name="text_ja" class="solr.TextField"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.JapaneseTokenizerFactory"
+    ///       mode="NORMAL"
+    ///       userDictionary="user.txt"
+    ///       userDictionaryEncoding="UTF-8"
+    ///       discardPunctuation="true"
+    ///     /&gt;
+    ///     &lt;filter class="solr.JapaneseBaseFormFilterFactory"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    public class JapaneseTokenizerFactory : TokenizerFactory, IResourceLoaderAware
+    {
+        private static readonly string MODE = "mode";
+
+        private static readonly string USER_DICT_PATH = "userDictionary";
+
+        private static readonly string USER_DICT_ENCODING = "userDictionaryEncoding";
+
+        private static readonly string DISCARD_PUNCTUATION = "discardPunctuation"; // Expert option
+
+        private UserDictionary userDictionary;
+
+        private readonly JapaneseTokenizerMode mode;
+        private readonly bool discardPunctuation;
+        private readonly string userDictionaryPath;
+        private readonly string userDictionaryEncoding;
+
+        /// <summary>Creates a new <see cref="JapaneseTokenizerFactory"/>.</summary>
+        public JapaneseTokenizerFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            Enum.TryParse(Get(args, MODE, JapaneseTokenizer.DEFAULT_MODE.ToString()), true, out mode);
+            userDictionaryPath = Get(args, USER_DICT_PATH);
+            userDictionaryEncoding = Get(args, USER_DICT_ENCODING);
+            discardPunctuation = GetBoolean(args, DISCARD_PUNCTUATION, true);
+            if (args.Count > 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public virtual void Inform(IResourceLoader loader)
+        {
+            if (userDictionaryPath != null)
+            {
+                Stream stream = loader.OpenResource(userDictionaryPath);
+                string encoding = userDictionaryEncoding;
+                if (encoding == null)
+                {
+                    encoding = Encoding.UTF8.WebName;
+                }
+                Encoding decoder = Encoding.GetEncoding(encoding);
+                TextReader reader = new StreamReader(stream, decoder);
+                userDictionary = new UserDictionary(reader);
+            }
+            else
+            {
+                userDictionary = null;
+            }
+        }
+
+        public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
+        {
+            return new JapaneseTokenizer(factory, input, userDictionary, discardPunctuation, mode);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Lucene.Net.Analysis.Kuromoji.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Lucene.Net.Analysis.Kuromoji.csproj b/src/Lucene.Net.Analysis.Kuromoji/Lucene.Net.Analysis.Kuromoji.csproj
new file mode 100644
index 0000000..782aad3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Lucene.Net.Analysis.Kuromoji.csproj
@@ -0,0 +1,118 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{8408625A-2508-46D5-8519-045183C43724}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Analysis.Ja</RootNamespace>
+    <AssemblyName>Lucene.Net.Analysis.Kuromoji</AssemblyName>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="Microsoft.CSharp" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Dict\BinaryDictionary.cs" />
+    <Compile Include="Dict\CharacterDefinition.cs" />
+    <Compile Include="Dict\ConnectionCosts.cs" />
+    <Compile Include="Dict\Dictionary.cs" />
+    <Compile Include="Dict\TokenInfoDictionary.cs" />
+    <Compile Include="Dict\TokenInfoFST.cs" />
+    <Compile Include="Dict\UnknownDictionary.cs" />
+    <Compile Include="Dict\UserDictionary.cs" />
+    <Compile Include="GraphvizFormatter.cs" />
+    <Compile Include="JapaneseAnalyzer.cs" />
+    <Compile Include="JapaneseBaseFormFilter.cs" />
+    <Compile Include="JapaneseBaseFormFilterFactory.cs" />
+    <Compile Include="JapaneseIterationMarkCharFilter.cs" />
+    <Compile Include="JapaneseIterationMarkCharFilterFactory.cs" />
+    <Compile Include="JapaneseKatakanaStemFilter.cs" />
+    <Compile Include="JapaneseKatakanaStemFilterFactory.cs" />
+    <Compile Include="JapanesePartOfSpeechStopFilter.cs" />
+    <Compile Include="JapanesePartOfSpeechStopFilterFactory.cs" />
+    <Compile Include="JapaneseReadingFormFilter.cs" />
+    <Compile Include="JapaneseReadingFormFilterFactory.cs" />
+    <Compile Include="JapaneseTokenizer.cs" />
+    <Compile Include="JapaneseTokenizerFactory.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="Token.cs" />
+    <Compile Include="TokenAttributes\BaseFormAttribute.cs" />
+    <Compile Include="TokenAttributes\BaseFormAttributeImpl.cs" />
+    <Compile Include="TokenAttributes\InflectionAttribute.cs" />
+    <Compile Include="TokenAttributes\InflectionAttributeImpl.cs" />
+    <Compile Include="TokenAttributes\PartOfSpeechAttribute.cs" />
+    <Compile Include="TokenAttributes\PartOfSpeechAttributeImpl.cs" />
+    <Compile Include="TokenAttributes\ReadingAttribute.cs" />
+    <Compile Include="TokenAttributes\ReadingAttributeImpl.cs" />
+    <Compile Include="Tools\BinaryDictionaryWriter.cs" />
+    <Compile Include="Tools\CharacterDefinitionWriter.cs" />
+    <Compile Include="Tools\ConnectionCostsBuilder.cs" />
+    <Compile Include="Tools\ConnectionCostsWriter.cs" />
+    <Compile Include="Tools\DictionaryBuilder.cs" />
+    <Compile Include="Tools\TokenInfoDictionaryBuilder.cs" />
+    <Compile Include="Tools\TokenInfoDictionaryWriter.cs" />
+    <Compile Include="Tools\UnknownDictionaryBuilder.cs" />
+    <Compile Include="Tools\UnknownDictionaryWriter.cs" />
+    <Compile Include="Util\CSVUtil.cs" />
+    <Compile Include="Util\ToStringUtil.cs" />
+    <Compile Include="..\CommonAssemblyInfo.cs">
+      <Link>Properties\CommonAssemblyInfo.cs</Link>
+    </Compile>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+      <Project>{4ADD0BBC-B900-4715-9526-D871DE8EEA64}</Project>
+      <Name>Lucene.Net.Analysis.Common</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj">
+      <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="Dict\CharacterDefinition.dat" />
+    <EmbeddedResource Include="Dict\ConnectionCosts.dat" />
+    <EmbeddedResource Include="Dict\TokenInfoDictionary%24buffer.dat" />
+    <EmbeddedResource Include="Dict\TokenInfoDictionary%24fst.dat" />
+    <EmbeddedResource Include="Dict\TokenInfoDictionary%24posDict.dat" />
+    <EmbeddedResource Include="Dict\TokenInfoDictionary%24targetMap.dat" />
+    <EmbeddedResource Include="Dict\UnknownDictionary%24buffer.dat" />
+    <EmbeddedResource Include="Dict\UnknownDictionary%24posDict.dat" />
+    <EmbeddedResource Include="Dict\UnknownDictionary%24targetMap.dat" />
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="stoptags.txt" />
+    <EmbeddedResource Include="stopwords.txt" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Lucene.Net.Analysis.Kuromoji.project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Lucene.Net.Analysis.Kuromoji.project.json b/src/Lucene.Net.Analysis.Kuromoji/Lucene.Net.Analysis.Kuromoji.project.json
new file mode 100644
index 0000000..86d1c12
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Lucene.Net.Analysis.Kuromoji.project.json
@@ -0,0 +1,8 @@
+{
+  "runtimes": {
+    "win": {}
+  },
+  "frameworks": {
+    "net451": {}
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Lucene.Net.Analysis.Kuromoji.xproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Lucene.Net.Analysis.Kuromoji.xproj b/src/Lucene.Net.Analysis.Kuromoji/Lucene.Net.Analysis.Kuromoji.xproj
new file mode 100644
index 0000000..59a3016
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Lucene.Net.Analysis.Kuromoji.xproj
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<Project ToolsVersion="14.0.25420" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0.25420</VisualStudioVersion>
+    <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" Condition="'$(VSToolsPath)' != ''" />
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>87e54ca7-7394-4705-a99a-0dd638265c56</ProjectGuid>
+    <RootNamespace>Lucene.Net.Analysis.Ja</RootNamespace>
+    <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">.\obj</BaseIntermediateOutputPath>
+    <OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup>
+    <SchemaVersion>2.0</SchemaVersion>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" Condition="'$(VSToolsPath)' != ''" />
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Properties/AssemblyInfo.cs b/src/Lucene.Net.Analysis.Kuromoji/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..2ea0e44
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Properties/AssemblyInfo.cs
@@ -0,0 +1,46 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Analysis.Kuromoji")]
+[assembly: AssemblyDescription("Japanese Morphological Analyzer " +
+    "for the Lucene.Net full-text search engine library from The Apache Software Foundation.")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyDefaultAlias("Lucene.Net.Analysis.Kuromoji")]
+[assembly: AssemblyCulture("")]
+
+[assembly: CLSCompliant(true)]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("8408625a-2508-46d5-8519-045183c43724")]
+
+// for testing
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.Analysis.Kuromoji")]
+
+// NOTE: Version information is in CommonAssemblyInfo.cs

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Token.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Token.cs b/src/Lucene.Net.Analysis.Kuromoji/Token.cs
new file mode 100644
index 0000000..fd1afd2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Token.cs
@@ -0,0 +1,194 @@
+using Lucene.Net.Analysis.Ja.Dict;
+using Lucene.Net.Support;
+using System.Diagnostics.CodeAnalysis;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Analyzed token with morphological data from its dictionary.
+    /// </summary>
+    public class Token
+    {
+        private readonly IDictionary dictionary;
+
+        private readonly int wordId;
+
+        private readonly char[] surfaceForm;
+        private readonly int offset;
+        private readonly int length;
+
+        private readonly int position;
+        private int positionLength;
+
+        private readonly JapaneseTokenizerType type;
+
+        public Token(int wordId, char[] surfaceForm, int offset, int length, JapaneseTokenizerType type, int position, IDictionary dictionary)
+        {
+            this.wordId = wordId;
+            this.surfaceForm = surfaceForm;
+            this.offset = offset;
+            this.length = length;
+            this.type = type;
+            this.position = position;
+            this.dictionary = dictionary;
+        }
+
+        public override string ToString()
+        {
+            return "Token(\"" + new string(surfaceForm, offset, length) + "\" pos=" + position + " length=" + length +
+                " posLen=" + positionLength + " type=" + type + " wordId=" + wordId +
+                " leftID=" + dictionary.GetLeftId(wordId) + ")";
+        }
+
+        /// <summary>
+        /// surfaceForm
+        /// </summary>
+        [WritableArray]
+        [SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
+        public virtual char[] SurfaceForm
+        {
+            get { return surfaceForm; }
+        }
+
+        /// <summary>
+        /// offset into surfaceForm
+        /// </summary>
+        public virtual int Offset
+        {
+            get { return offset; }
+        }
+
+        /// <summary>
+        /// length of surfaceForm
+        /// </summary>
+        public virtual int Length
+        {
+            get { return length; }
+        }
+
+        /// <summary>
+        /// surfaceForm as a String
+        /// </summary>
+        /// <returns>surfaceForm as a String</returns>
+        public virtual string GetSurfaceFormString()
+        {
+            return new string(surfaceForm, offset, length);
+        }
+
+        /// <summary>
+        /// reading. <c>null</c> if token doesn't have reading.
+        /// </summary>
+        /// <returns>reading. <c>null</c> if token doesn't have reading.</returns>
+        public virtual string GetReading()
+        {
+            return dictionary.GetReading(wordId, surfaceForm, offset, length);
+        }
+
+        /// <summary>
+        /// pronunciation. <c>null</c> if token doesn't have pronunciation.
+        /// </summary>
+        /// <returns>pronunciation. <c>null</c> if token doesn't have pronunciation.</returns>
+        public virtual string GetPronunciation()
+        {
+            return dictionary.GetPronunciation(wordId, surfaceForm, offset, length);
+        }
+
+        /// <summary>
+        /// part of speech.
+        /// </summary>
+        /// <returns>part of speech.</returns>
+        public virtual string GetPartOfSpeech()
+        {
+            return dictionary.GetPartOfSpeech(wordId);
+        }
+
+        /// <summary>
+        /// inflection type or <c>null</c>
+        /// </summary>
+        /// <returns>inflection type or <c>null</c></returns>
+        public virtual string GetInflectionType()
+        {
+            return dictionary.GetInflectionType(wordId);
+        }
+
+        /// <summary>
+        /// inflection form or <c>null</c>
+        /// </summary>
+        /// <returns>inflection form or <c>null</c></returns>
+        public virtual string GetInflectionForm()
+        {
+            return dictionary.GetInflectionForm(wordId);
+        }
+
+        /// <summary>
+        /// base form or <c>null</c> if token is not inflected
+        /// </summary>
+        /// <returns>base form or <c>null</c> if token is not inflected</returns>
+        public virtual string GetBaseForm()
+        {
+            return dictionary.GetBaseForm(wordId, surfaceForm, offset, length);
+        }
+
+        /// <summary>
+        /// Returns <c>true</c> if this token is known word.
+        /// </summary>
+        /// <returns><c>true</c> if this token is in standard dictionary. <c>false</c> if not.</returns>
+        public virtual bool IsKnown()
+        {
+            return type == JapaneseTokenizerType.KNOWN;
+        }
+
+        /// <summary>
+        /// Returns <c>true</c> if this token is unknown word.
+        /// </summary>
+        /// <returns><c>true</c> if this token is unknown word. <c>false</c> if not.</returns>
+        public virtual bool IsUnknown()
+        {
+            return type == JapaneseTokenizerType.UNKNOWN;
+        }
+
+        /// <summary>
+        /// Returns <c>true</c> if this token is defined in user dictionary.
+        /// </summary>
+        /// <returns><c>true</c> if this token is in user dictionary. <c>false</c> if not.</returns>
+        public virtual bool IsUser()
+        {
+            return type == JapaneseTokenizerType.USER;
+        }
+
+        /// <summary>
+        /// Get index of this token in input text. Returns position of token.
+        /// </summary>
+        public virtual int Position
+        {
+            get { return position; }
+        }
+
+        /// <summary>
+        /// Gets or Sets the length (in tokens) of this token.  For normal
+        /// tokens this is 1; for compound tokens it's > 1.
+        /// </summary>
+        public virtual int PositionLength
+        {
+            get { return positionLength; }
+            set { this.positionLength = value; }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/BaseFormAttribute.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/BaseFormAttribute.cs b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/BaseFormAttribute.cs
new file mode 100644
index 0000000..e3a06b3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/BaseFormAttribute.cs
@@ -0,0 +1,33 @@
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Ja.TokenAttributes
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Attribute for <see cref="Token.BaseForm"/>
+    /// <para/>
+    /// Note: depending on part of speech, this value may not be applicable,
+    /// and will be null.
+    /// </summary>
+    public interface IBaseFormAttribute : IAttribute
+    {
+        string GetBaseForm();
+        void SetToken(Token token);
+    }
+}

[12/13] lucenenet git commit: lucene-cli: Added command for Kuromoji DictionaryBuilder tool + tests + documentation

Posted by ni...@apache.org.

lucene-cli: Added command for Kuromoji DictionaryBuilder tool + tests + documentation


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/bacfcc1a
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/bacfcc1a
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/bacfcc1a

Branch: refs/heads/master
Commit: bacfcc1adbe0fa46bbc5a3ba1d657258cb9c571d
Parents: 0f09201
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Mon Jul 24 00:13:23 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Mon Jul 24 00:35:28 2017 +0700

----------------------------------------------------------------------
 ...nalysisKuromojiBuildDictionaryCommandTest.cs | 104 +++++++++++++++++++
 .../lucene-cli/Resources/Strings.Designer.cs    |  54 ++++++++++
 src/tools/lucene-cli/Resources/Strings.resx     |  18 ++++
 .../commands/analysis/AnalysisCommand.cs        |   2 +-
 .../AnalysisKuromojiBuildDictionaryCommand.cs   |  95 +++++++++++++++++
 src/tools/lucene-cli/docs/analysis/index.md     |   1 +
 .../docs/analysis/kuromoji-build-dictionary.md  |  46 ++++++++
 src/tools/lucene-cli/project.json               |   3 +-
 8 files changed, 321 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bacfcc1a/src/tools/Lucene.Net.Tests.Cli/Commands/Analysis/AnalysisKuromojiBuildDictionaryCommandTest.cs
----------------------------------------------------------------------
diff --git a/src/tools/Lucene.Net.Tests.Cli/Commands/Analysis/AnalysisKuromojiBuildDictionaryCommandTest.cs b/src/tools/Lucene.Net.Tests.Cli/Commands/Analysis/AnalysisKuromojiBuildDictionaryCommandTest.cs
new file mode 100644
index 0000000..c8eaa41
--- /dev/null
+++ b/src/tools/Lucene.Net.Tests.Cli/Commands/Analysis/AnalysisKuromojiBuildDictionaryCommandTest.cs
@@ -0,0 +1,104 @@
+using Lucene.Net.Attributes;
+using Lucene.Net.Cli.CommandLine;
+using NUnit.Framework;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Cli.Commands
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class AnalysisKuromojiBuildDictionaryCommandTest : CommandTestCase
+    {
+        protected override ConfigurationBase CreateConfiguration(MockConsoleApp app)
+        {
+            return new AnalysisKuromojiBuildDictionaryCommand.Configuration(new CommandLineOptions()) { Main = (args) => app.Main(args) };
+        }
+
+        protected override IList<Arg[]> GetOptionalArgs()
+        {
+            // NOTE: We must order this in the sequence of the expected output.
+            return new List<Arg[]>()
+            {
+                new Arg[] { new Arg(inputPattern: "-e UTF-16|--encoding UTF-16", output: new string[] { "--encoding", "UTF-16" }) },
+                new Arg[] { new Arg(inputPattern: "-n|--normalize", output: new string[] { "true" }) },
+            };
+        }
+        protected override IList<Arg[]> GetRequiredArgs()
+        {
+            // NOTE: We must order this in the sequence of the expected output.
+            return new List<Arg[]>()
+            {
+                new Arg[] { new Arg(inputPattern: "epidic", output: new string[] { @"epidic" }) },
+                new Arg[] { new Arg(inputPattern: @"C:\lucene-input", output: new string[] { @"C:\lucene-input" }) },
+                new Arg[] { new Arg(inputPattern: @"C:\lucene-output", output: new string[] { @"C:\lucene-output" }) },
+            };
+        }
+
+        [Test]
+        [LuceneNetSpecific]
+        public override void TestAllValidCombinations()
+        {
+            var requiredArgs = GetRequiredArgs().ExpandArgs().RequiredParameters();
+            var optionalArgs = GetOptionalArgs().ExpandArgs().OptionalParameters();
+
+            foreach (var requiredArg in requiredArgs)
+            {
+                AssertCommandTranslation(
+                    string.Join(" ", requiredArg.Select(x => x.InputPattern).ToArray()),
+                    requiredArg.SelectMany(x => x.Output)
+                    
+                    .Concat(new string[] {
+                        // Special case: the encoding must always be supplied
+                        "utf-8",
+                        // Special case: normalize must always be supplied
+                        "false"
+                    }).ToArray());
+            }
+
+            foreach (var requiredArg in requiredArgs)
+            {
+                foreach (var optionalArg in optionalArgs)
+                {
+                    string command = string.Join(" ", requiredArg.Select(x => x.InputPattern).Union(optionalArg.Select(x => x.InputPattern).ToArray()));
+                    string[] expected = requiredArg.SelectMany(x => x.Output)
+                        // Special case: the encoding must always be supplied
+                        .Concat(Regex.IsMatch(command, "-e|--encoding") ? new string[] { "UTF-16" } : new string[] { "utf-8" })
+                        // Special case: the encoding must always be supplied
+                        .Concat(Regex.IsMatch(command, "-n|--normalize") ? new string[] { "true" } : new string[] { "false" }).ToArray();
+                    AssertCommandTranslation(command, expected);
+                }
+            }
+        }
+
+        [Test]
+        [LuceneNetSpecific]
+        public virtual void TestNotEnoughArguments()
+        {
+            AssertConsoleOutput("one two", FromResource("NotEnoughArguments", 3));
+        }
+
+        [Test]
+        [LuceneNetSpecific]
+        public virtual void TestTooManyArguments()
+        {
+            Assert.Throws<CommandParsingException>(() => AssertConsoleOutput("one two three four", ""));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bacfcc1a/src/tools/lucene-cli/Resources/Strings.Designer.cs
----------------------------------------------------------------------
diff --git a/src/tools/lucene-cli/Resources/Strings.Designer.cs b/src/tools/lucene-cli/Resources/Strings.Designer.cs
index 5d1fa93..9af44ff 100644
--- a/src/tools/lucene-cli/Resources/Strings.Designer.cs
+++ b/src/tools/lucene-cli/Resources/Strings.Designer.cs
@@ -69,6 +69,60 @@ namespace Lucene.Net.Cli.Resources {
         }
         
         /// <summary>
+        ///    Looks up a localized string similar to Builds a custom dictionary that can be used by the JapaneseAnalyzer or JapaneseTokenizer..
+        /// </summary>
+        public static string AnalysisKuromojiBuildDictionaryCommandDescription {
+            get {
+                return ResourceManager.GetString("AnalysisKuromojiBuildDictionaryCommandDescription", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///    Looks up a localized string similar to The dictionary format. Valid values are IPADIC and UNIDIC. If an invalid value is passed, IPADIC is assumed..
+        /// </summary>
+        public static string AnalysisKuromojiBuildDictionaryCommandFormatDescription {
+            get {
+                return ResourceManager.GetString("AnalysisKuromojiBuildDictionaryCommandFormatDescription", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///    Looks up a localized string similar to The directory where the dictionary input files are located..
+        /// </summary>
+        public static string AnalysisKuromojiBuildDictionaryCommandInputDirectoryDescription {
+            get {
+                return ResourceManager.GetString("AnalysisKuromojiBuildDictionaryCommandInputDirectoryDescription", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///    Looks up a localized string similar to The file encoding used by the input files. If not supplied, the default value is `UTF-8`..
+        /// </summary>
+        public static string AnalysisKuromojiBuildDictionaryCommandInputDirectoryEncodingDescription {
+            get {
+                return ResourceManager.GetString("AnalysisKuromojiBuildDictionaryCommandInputDirectoryEncodingDescription", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///    Looks up a localized string similar to Normalize the entries using normalization form KC..
+        /// </summary>
+        public static string AnalysisKuromojiBuildDictionaryCommandNormalizeDescription {
+            get {
+                return ResourceManager.GetString("AnalysisKuromojiBuildDictionaryCommandNormalizeDescription", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///    Looks up a localized string similar to The directory to put the dictionary output..
+        /// </summary>
+        public static string AnalysisKuromojiBuildDictionaryCommandOutputDirectoryDescription {
+            get {
+                return ResourceManager.GetString("AnalysisKuromojiBuildDictionaryCommandOutputDirectoryDescription", resourceCulture);
+            }
+        }
+        
+        /// <summary>
         ///    Looks up a localized string similar to Compiles a stemmer table for the Egothor stemmer..
         /// </summary>
         public static string AnalysisStempelCompileStemsCommandDescription {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bacfcc1a/src/tools/lucene-cli/Resources/Strings.resx
----------------------------------------------------------------------
diff --git a/src/tools/lucene-cli/Resources/Strings.resx b/src/tools/lucene-cli/Resources/Strings.resx
index 64be738..727cb62 100644
--- a/src/tools/lucene-cli/Resources/Strings.resx
+++ b/src/tools/lucene-cli/Resources/Strings.resx
@@ -120,6 +120,24 @@
   <data name="AnalysisCommandDescription" xml:space="preserve">
     <value>Utilities to manage specialized analyzers.</value>
   </data>
+  <data name="AnalysisKuromojiBuildDictionaryCommandDescription" xml:space="preserve">
+    <value>Builds a custom dictionary that can be used by the JapaneseAnalyzer or JapaneseTokenizer.</value>
+  </data>
+  <data name="AnalysisKuromojiBuildDictionaryCommandFormatDescription" xml:space="preserve">
+    <value>The dictionary format. Valid values are IPADIC and UNIDIC. If an invalid value is passed, IPADIC is assumed.</value>
+  </data>
+  <data name="AnalysisKuromojiBuildDictionaryCommandInputDirectoryDescription" xml:space="preserve">
+    <value>The directory where the dictionary input files are located.</value>
+  </data>
+  <data name="AnalysisKuromojiBuildDictionaryCommandInputDirectoryEncodingDescription" xml:space="preserve">
+    <value>The file encoding used by the input files. If not supplied, the default value is `UTF-8`.</value>
+  </data>
+  <data name="AnalysisKuromojiBuildDictionaryCommandNormalizeDescription" xml:space="preserve">
+    <value>Normalize the entries using normalization form KC.</value>
+  </data>
+  <data name="AnalysisKuromojiBuildDictionaryCommandOutputDirectoryDescription" xml:space="preserve">
+    <value>The directory to put the dictionary output.</value>
+  </data>
   <data name="AnalysisStempelCompileStemsCommandDescription" xml:space="preserve">
     <value>Compiles a stemmer table for the Egothor stemmer.</value>
   </data>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bacfcc1a/src/tools/lucene-cli/commands/analysis/AnalysisCommand.cs
----------------------------------------------------------------------
diff --git a/src/tools/lucene-cli/commands/analysis/AnalysisCommand.cs b/src/tools/lucene-cli/commands/analysis/AnalysisCommand.cs
index 969bd58..a39eaeb 100644
--- a/src/tools/lucene-cli/commands/analysis/AnalysisCommand.cs
+++ b/src/tools/lucene-cli/commands/analysis/AnalysisCommand.cs
@@ -27,7 +27,7 @@
                 this.Description = FromResource("Description");
 
                 //this.Commands.Add(new AnalysisICUBuildRBBIRulesCommand.Configuration(options));
-                //this.Commands.Add(new AnalysisKuromojiBuildDictionaryCommand.Configuration(options));
+                this.Commands.Add(new AnalysisKuromojiBuildDictionaryCommand.Configuration(options));
                 this.Commands.Add(new AnalysisStempelCompileStemsCommand.Configuration(options));
                 this.Commands.Add(new AnalysisStempelPatchStemsCommand.Configuration(options));
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bacfcc1a/src/tools/lucene-cli/commands/analysis/analysis-kuromoji-build-dictionary/AnalysisKuromojiBuildDictionaryCommand.cs
----------------------------------------------------------------------
diff --git a/src/tools/lucene-cli/commands/analysis/analysis-kuromoji-build-dictionary/AnalysisKuromojiBuildDictionaryCommand.cs b/src/tools/lucene-cli/commands/analysis/analysis-kuromoji-build-dictionary/AnalysisKuromojiBuildDictionaryCommand.cs
new file mode 100644
index 0000000..7f10ed7
--- /dev/null
+++ b/src/tools/lucene-cli/commands/analysis/analysis-kuromoji-build-dictionary/AnalysisKuromojiBuildDictionaryCommand.cs
@@ -0,0 +1,95 @@
+using Lucene.Net.Analysis.Ja.Util;
+using Lucene.Net.Cli.CommandLine;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Cli
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class AnalysisKuromojiBuildDictionaryCommand : ICommand
+    {
+        public class Configuration : ConfigurationBase
+        {
+            public Configuration(CommandLineOptions options)
+            {
+                this.Main = (args) => DictionaryBuilder.Main(args);
+
+                this.Name = "kuromoji-build-dictionary";
+                this.Description = FromResource("Description");
+
+                this.Format = this.Argument(
+                    "<FORMAT>",
+                    FromResource("FormatDescription"));
+                this.InputDirectory = this.Argument(
+                    "<INPUT_DIRECTORY>",
+                    FromResource("InputDirectoryDescription"));
+                this.OutputDirectory = this.Argument(
+                    "<OUTPUT_DIRECTORY>",
+                    FromResource("OutputDirectoryDescription"));
+                this.InputDirectoryEncoding = this.Option(
+                    "-e|--encoding <ENCODING>",
+                    FromResource("InputDirectoryEncodingDescription"),
+                    CommandOptionType.SingleValue);
+                this.Normalize = this.Option(
+                    "-n|--normalize",
+                    FromResource("NormalizeDescription"),
+                    CommandOptionType.NoValue);
+
+                this.OnExecute(() => new AnalysisKuromojiBuildDictionaryCommand().Run(this));
+            }
+
+            public virtual CommandArgument Format { get; private set; }
+            public virtual CommandArgument InputDirectory { get; private set; }
+            public virtual CommandArgument OutputDirectory { get; private set; }
+            public virtual CommandOption InputDirectoryEncoding { get; private set; }
+            public virtual CommandOption Normalize { get; private set; }
+        }
+
+        public int Run(ConfigurationBase cmd)
+        {
+            if (!cmd.ValidateArguments(3))
+            {
+                return 1;
+            }
+
+            var input = cmd as Configuration;
+            var args = new List<string>(input.GetNonNullArguments());
+
+            if (input.InputDirectoryEncoding.HasValue())
+            {
+                args.Add(input.InputDirectoryEncoding.Value());
+            }
+            else
+            {
+                args.Add("utf-8");
+            }
+
+            if (input.Normalize.HasValue())
+            {
+                args.Add("true");
+            }
+            else
+            {
+                args.Add("false");
+            }
+
+            cmd.Main(args.ToArray());
+            return 0;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bacfcc1a/src/tools/lucene-cli/docs/analysis/index.md
----------------------------------------------------------------------
diff --git a/src/tools/lucene-cli/docs/analysis/index.md b/src/tools/lucene-cli/docs/analysis/index.md
index c114294..9843805 100644
--- a/src/tools/lucene-cli/docs/analysis/index.md
+++ b/src/tools/lucene-cli/docs/analysis/index.md
@@ -6,5 +6,6 @@ Utilities to manage specialized analyzers.
 
 ## Commands
 
+- [kuromoji-build-dictionary](kuromoji-build-dictionary.md)
 - [stempel-compile-stems](stempel-compile-stems.md)
 - [stempel-patch-stems](stempel-patch-stems.md)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bacfcc1a/src/tools/lucene-cli/docs/analysis/kuromoji-build-dictionary.md
----------------------------------------------------------------------
diff --git a/src/tools/lucene-cli/docs/analysis/kuromoji-build-dictionary.md b/src/tools/lucene-cli/docs/analysis/kuromoji-build-dictionary.md
new file mode 100644
index 0000000..9fd7cf6
--- /dev/null
+++ b/src/tools/lucene-cli/docs/analysis/kuromoji-build-dictionary.md
@@ -0,0 +1,46 @@
+# kuromoji-build-dictionary
+
+### Name
+
+`analysis-kuromoji-build-dictionary` - Generates a dictionary file for the JapaneseAnalyzer or JapaneseTokenizer in the Lucene.Net.Analysis.Kuromoji project.
+
+### Synopsis
+
+<code>dotnet lucene-cli.dll analysis kuromoji-build-dictionary <FORMAT> <INPUT_DIRECTORY> <OUTPUT_DIRECTORY> [-e|--encoding] [-n|--normalize] [?|-h|--help]</code>
+
+### Description
+
+See the [Kuromoji project documentation](https://github.com/atilika/kuromoji) for more information.
+
+### Arguments
+
+`FORMAT`
+
+The dictionary format. Valid values are IPADIC and UNIDIC. If an invalid value is passed, IPADIC is assumed.
+
+`INPUT_DIRECTORY`
+
+The directory where the dictionary input files are located.
+
+`OUTPUT_DIRECTORY`
+
+The directory to put the dictionary output.
+
+### Options
+
+`?|-h|--help`
+
+Prints out a short help for the command.
+
+`-e|--encoding <ENCODING>`
+
+The file encoding used by the input files. If not supplied, the default value is `UTF-8`.
+
+`-n|--normalize`
+
+Normalize the entries using normalization form KC.
+
+### Example
+
+<code>dotnet lucene-cli.dll analysis kuromoji-build-dictionary X:\kuromoji-data X:\kuromoji-dictionary --encoding UTF-16</code>
+

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bacfcc1a/src/tools/lucene-cli/project.json
----------------------------------------------------------------------
diff --git a/src/tools/lucene-cli/project.json b/src/tools/lucene-cli/project.json
index 219964d..767a705 100644
--- a/src/tools/lucene-cli/project.json
+++ b/src/tools/lucene-cli/project.json
@@ -1,4 +1,4 @@
-{
+{
   "version": "4.8.0",
   "entryPoint": "Program",
   "buildOptions": {
@@ -19,6 +19,7 @@
   "dependencies": {
     "Lucene.Net": "4.8.0",
     "Lucene.Net.Analysis.Common": "4.8.0",
+    "Lucene.Net.Analysis.Kuromoji": "4.8.0",
     "Lucene.Net.Analysis.Stempel": "4.8.0",
     "Lucene.Net.Demo": "4.8.0",
     "Lucene.Net.Expressions": "4.8.0",

[03/13] lucenenet git commit: Ported Lucene.Net.Analysis.Kuromoji + tests

Posted by ni...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Tools/UnknownDictionaryTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Tools/UnknownDictionaryTest.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/Tools/UnknownDictionaryTest.cs
new file mode 100644
index 0000000..a4940f5
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Tools/UnknownDictionaryTest.cs
@@ -0,0 +1,93 @@
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class UnknownDictionaryTest : LuceneTestCase
+    {
+        public static readonly string FILENAME = "unk-tokeninfo-dict.obj";
+
+        [Test]
+        public void TestPutCharacterCategory()
+        {
+            UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);
+
+            try
+            {
+                unkDic.PutCharacterCategory(0, "DUMMY_NAME");
+                fail();
+            }
+#pragma warning disable 168
+            catch (Exception e)
+#pragma warning restore 168
+            {
+
+            }
+
+            try
+            {
+                unkDic.PutCharacterCategory(-1, "KATAKANA");
+                fail();
+            }
+#pragma warning disable 168
+            catch (Exception e)
+#pragma warning restore 168
+            {
+
+            }
+
+            unkDic.PutCharacterCategory(0, "DEFAULT");
+            unkDic.PutCharacterCategory(1, "GREEK");
+            unkDic.PutCharacterCategory(2, "HIRAGANA");
+            unkDic.PutCharacterCategory(3, "KATAKANA");
+            unkDic.PutCharacterCategory(4, "KANJI");
+        }
+
+        [Test]
+        public void TestPut()
+        {
+            UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);
+            try
+            {
+                unkDic.Put(CSVUtil.Parse("KANJI,1285,11426,名詞,一般,*,*,*,*,*,*,*"));
+                fail();
+            }
+#pragma warning disable 168
+            catch (Exception e)
+#pragma warning restore 168
+            {
+
+            }
+
+            String entry1 = "ALPHA,1285,1285,13398,名詞,一般,*,*,*,*,*,*,*";
+            String entry2 = "HIRAGANA,1285,1285,13069,名詞,一般,*,*,*,*,*,*,*";
+            String entry3 = "KANJI,1285,1285,11426,名詞,一般,*,*,*,*,*,*,*";
+
+            unkDic.PutCharacterCategory(0, "ALPHA");
+            unkDic.PutCharacterCategory(1, "HIRAGANA");
+            unkDic.PutCharacterCategory(2, "KANJI");
+
+            unkDic.Put(CSVUtil.Parse(entry1));
+            unkDic.Put(CSVUtil.Parse(entry2));
+            unkDic.Put(CSVUtil.Parse(entry3));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Util/TestToStringUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Util/TestToStringUtil.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/Util/TestToStringUtil.cs
new file mode 100644
index 0000000..2922b27
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Util/TestToStringUtil.cs
@@ -0,0 +1,121 @@
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TestToStringUtil : LuceneTestCase
+    {
+        [Test]
+        public void TestPOS()
+        {
+            assertEquals("noun-suffix-verbal", ToStringUtil.GetPOSTranslation("名詞-接尾-サ変接続"));
+        }
+
+        [Test]
+        public void TestHepburn()
+        {
+            assertEquals("majan", ToStringUtil.GetRomanization("マージャン"));
+            assertEquals("uroncha", ToStringUtil.GetRomanization("ウーロンチャ"));
+            assertEquals("chahan", ToStringUtil.GetRomanization("チャーハン"));
+            assertEquals("chashu", ToStringUtil.GetRomanization("チャーシュー"));
+            assertEquals("shumai", ToStringUtil.GetRomanization("シューマイ"));
+        }
+
+        // see http://en.wikipedia.org/wiki/Hepburn_romanization,
+        // but this isnt even thorough or really probably what we want!
+        [Test]
+        public void TestHepburnTable()
+        {
+            IDictionary<String, String> table = new Dictionary<String, String>() {
+                { "ア", "a" }, { "イ", "i" }, { "ウ", "u" }, { "エ", "e" }, { "オ", "o" },
+                { "カ", "ka" }, { "キ", "ki" }, { "ク", "ku" }, { "ケ", "ke" }, { "コ", "ko" },
+                { "サ", "sa" }, { "シ", "shi" }, { "ス", "su" }, { "セ", "se" }, { "ソ", "so" },
+                { "タ", "ta" }, { "チ", "chi" }, { "ツ", "tsu" }, { "テ", "te" }, { "ト", "to" },
+                { "ナ", "na" }, { "ニ", "ni" }, { "ヌ", "nu" }, { "ネ", "ne" }, { "ノ", "no" },
+                { "ハ", "ha" }, { "ヒ", "hi" }, { "フ", "fu" }, { "ヘ", "he" }, { "ホ", "ho" },
+                { "マ", "ma" }, { "ミ", "mi" }, { "ム", "mu" }, { "メ", "me" }, { "モ", "mo" },
+                { "ヤ", "ya" }, { "ユ", "yu" }, { "ヨ", "yo" },
+                { "ラ", "ra" }, { "リ", "ri" }, { "ル", "ru" }, { "レ", "re" }, { "ロ", "ro" },
+                { "ワ", "wa" }, { "ヰ", "i" }, { "ヱ", "e" }, { "ヲ", "o" },
+                { "ン", "n" },
+                { "ガ", "ga" }, { "ギ", "gi" }, { "グ", "gu" }, { "ゲ", "ge" }, { "ゴ", "go" },
+                { "ザ", "za" }, { "ジ", "ji" }, { "ズ", "zu" }, { "ゼ", "ze" }, { "ゾ", "zo" },
+                { "ダ", "da" }, { "ヂ", "ji" }, { "ヅ", "zu" }, { "デ", "de" }, { "ド", "do" },
+                { "バ", "ba" }, { "ビ", "bi" }, { "ブ", "bu" }, { "ベ", "be" }, { "ボ", "bo" },
+                { "パ", "pa" }, { "ピ", "pi" }, { "プ", "pu" }, { "ペ", "pe" }, { "ポ", "po" },
+
+                { "キャ", "kya" }, { "キュ", "kyu" }, { "キョ", "kyo" },
+                { "シャ", "sha" }, { "シュ", "shu" }, { "ショ", "sho" },
+                { "チャ", "cha" }, { "チュ", "chu" }, { "チョ", "cho" },
+                { "ニャ", "nya" }, { "ニュ", "nyu" }, { "ニョ", "nyo" },
+                { "ヒャ", "hya" }, { "ヒュ", "hyu" }, { "ヒョ", "hyo" },
+                { "ミャ", "mya" }, { "ミュ", "myu" }, { "ミョ", "myo" },
+                { "リャ", "rya" }, { "リュ", "ryu" }, { "リョ", "ryo" },
+                { "ギャ", "gya" }, { "ギュ", "gyu" }, { "ギョ", "gyo" },
+                { "ジャ", "ja" }, { "ジュ", "ju" }, { "ジョ", "jo" },
+                { "ヂャ", "ja" }, { "ヂュ", "ju" }, { "ヂョ", "jo" },
+                { "ビャ", "bya" }, { "ビュ", "byu" }, { "ビョ", "byo" },
+                { "ピャ", "pya" }, { "ピュ", "pyu" }, { "ピョ", "pyo" },
+
+                { "イィ", "yi" }, { "イェ", "ye" },
+                { "ウァ", "wa" }, { "ウィ", "wi" }, { "ウゥ", "wu" }, { "ウェ", "we" }, { "ウォ", "wo" },
+                { "ウュ", "wyu" },
+                // TODO: really should be vu
+                { "ヴァ", "va" }, { "ヴィ", "vi" }, { "ヴ", "v" }, { "ヴェ", "ve" }, { "ヴォ", "vo" },
+                { "ヴャ", "vya" }, { "ヴュ", "vyu" }, { "ヴィェ", "vye" }, { "ヴョ", "vyo" },
+                { "キェ", "kye" },
+                { "ギェ", "gye" },
+                { "クァ", "kwa" }, { "クィ", "kwi" }, { "クェ", "kwe" }, { "クォ", "kwo" },
+                { "クヮ", "kwa" },
+                { "グァ", "gwa" }, { "グィ", "gwi" }, { "グェ", "gwe" }, { "グォ", "gwo" },
+                { "グヮ", "gwa" },
+                { "シェ", "she" },
+                { "ジェ", "je" },
+                { "スィ", "si" },
+                { "ズィ", "zi" },
+                { "チェ", "che" },
+                { "ツァ", "tsa" }, { "ツィ", "tsi" }, { "ツェ", "tse" }, { "ツォ", "tso" },
+                { "ツュ", "tsyu" },
+                { "ティ", "ti" }, { "トゥ", "tu" },
+                { "テュ", "tyu" },
+                { "ディ", "di" }, { "ドゥ", "du" },
+                { "デュ", "dyu" },
+                { "ニェ", "nye" },
+                { "ヒェ", "hye" },
+                { "ビェ", "bye" },
+                { "ピェ", "pye" },
+                { "ファ", "fa" }, { "フィ", "fi" }, { "フェ", "fe" }, { "フォ", "fo" },
+                { "フャ", "fya" }, { "フュ", "fyu" }, { "フィェ", "fye" }, { "フョ", "fyo" },
+                { "ホゥ", "hu" },
+                { "ミェ", "mye" },
+                { "リェ", "rye" },
+                { "ラ゜", "la" }, { "リ゜", "li" }, { "ル゜", "lu" }, { "レ゜", "le" }, { "ロ゜", "lo" },
+                { "ヷ", "va" }, { "ヸ", "vi" }, { "ヹ", "ve" }, { "ヺ", "vo" },
+            };
+
+            foreach (String s in table.Keys)
+            {
+                assertEquals(s, table[s], ToStringUtil.GetRomanization(s));
+            }
+        }
+    }
+}

[11/13] lucenenet git commit: Ported Lucene.Net.Analysis.Kuromoji + tests

Posted by ni...@apache.org.

Ported Lucene.Net.Analysis.Kuromoji + tests


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/0f092010
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/0f092010
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/0f092010

Branch: refs/heads/master
Commit: 0f092010450cec325f541c5d6e404fd5f3b77a83
Parents: e67244a
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Jul 23 19:39:51 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Mon Jul 24 00:35:27 2017 +0700

----------------------------------------------------------------------
 CONTRIBUTING.md                                 |    6 +-
 Lucene.Net.Portable.sln                         |   20 +
 Lucene.Net.sln                                  |   52 +
 README.md                                       |    9 +-
 .../Dict/BinaryDictionary.cs                    |  330 ++++
 .../Dict/CharacterDefinition.cs                 |  124 ++
 .../Dict/CharacterDefinition.dat                |  Bin 0 -> 65568 bytes
 .../Dict/ConnectionCosts.cs                     |   90 ++
 .../Dict/ConnectionCosts.dat                    |  Bin 0 -> 2624540 bytes
 .../Dict/Dictionary.cs                          |  106 ++
 .../Dict/TokenInfoDictionary$buffer.dat         |  Bin 0 -> 4337216 bytes
 .../Dict/TokenInfoDictionary$fst.dat            |  Bin 0 -> 1716198 bytes
 .../Dict/TokenInfoDictionary$posDict.dat        |  Bin 0 -> 54870 bytes
 .../Dict/TokenInfoDictionary$targetMap.dat      |  Bin 0 -> 392165 bytes
 .../Dict/TokenInfoDictionary.cs                 |   72 +
 .../Dict/TokenInfoFST.cs                        |  118 ++
 .../Dict/UnknownDictionary$buffer.dat           |  Bin 0 -> 311 bytes
 .../Dict/UnknownDictionary$posDict.dat          |  Bin 0 -> 4111 bytes
 .../Dict/UnknownDictionary$targetMap.dat        |  Bin 0 -> 69 bytes
 .../Dict/UnknownDictionary.cs                   |  100 ++
 .../Dict/UserDictionary.cs                      |  300 ++++
 .../GraphvizFormatter.cs                        |  197 +++
 .../JapaneseAnalyzer.cs                         |  103 ++
 .../JapaneseBaseFormFilter.cs                   |   65 +
 .../JapaneseBaseFormFilterFactory.cs            |   52 +
 .../JapaneseIterationMarkCharFilter.cs          |  500 ++++++
 .../JapaneseIterationMarkCharFilterFactory.cs   |   66 +
 .../JapaneseKatakanaStemFilter.cs               |  111 ++
 .../JapaneseKatakanaStemFilterFactory.cs        |   61 +
 .../JapanesePartOfSpeechStopFilter.cs           |   61 +
 .../JapanesePartOfSpeechStopFilterFactory.cs    |   85 +
 .../JapaneseReadingFormFilter.cs                |   89 ++
 .../JapaneseReadingFormFilterFactory.cs         |   57 +
 .../JapaneseTokenizer.cs                        | 1489 ++++++++++++++++++
 .../JapaneseTokenizerFactory.cs                 |  100 ++
 .../Lucene.Net.Analysis.Kuromoji.csproj         |  118 ++
 .../Lucene.Net.Analysis.Kuromoji.project.json   |    8 +
 .../Lucene.Net.Analysis.Kuromoji.xproj          |   38 +
 .../Properties/AssemblyInfo.cs                  |   46 +
 src/Lucene.Net.Analysis.Kuromoji/Token.cs       |  194 +++
 .../TokenAttributes/BaseFormAttribute.cs        |   33 +
 .../TokenAttributes/BaseFormAttributeImpl.cs    |   55 +
 .../TokenAttributes/InflectionAttribute.cs      |   34 +
 .../TokenAttributes/InflectionAttributeImpl.cs  |   68 +
 .../TokenAttributes/PartOfSpeechAttribute.cs    |   30 +
 .../PartOfSpeechAttributeImpl.cs                |   59 +
 .../TokenAttributes/ReadingAttribute.cs         |   34 +
 .../TokenAttributes/ReadingAttributeImpl.cs     |   68 +
 .../Tools/BinaryDictionaryWriter.cs             |  370 +++++
 .../Tools/CharacterDefinitionWriter.cs          |   91 ++
 .../Tools/ConnectionCostsBuilder.cs             |   68 +
 .../Tools/ConnectionCostsWriter.cs              |   74 +
 .../Tools/DictionaryBuilder.cs                  |   92 ++
 .../Tools/TokenInfoDictionaryBuilder.cs         |  230 +++
 .../Tools/TokenInfoDictionaryWriter.cs          |   51 +
 .../Tools/UnknownDictionaryBuilder.cs           |  146 ++
 .../Tools/UnknownDictionaryWriter.cs            |   66 +
 .../Util/CSVUtil.cs                             |  124 ++
 .../Util/ToStringUtil.cs                        | 1401 ++++++++++++++++
 src/Lucene.Net.Analysis.Kuromoji/project.json   |   60 +
 src/Lucene.Net.Analysis.Kuromoji/stoptags.txt   |  420 +++++
 src/Lucene.Net.Analysis.Kuromoji/stopwords.txt  |  127 ++
 .../Dict/TestTokenInfoDictionary.cs             |  114 ++
 .../Dict/UserDictionaryTest.cs                  |   90 ++
 .../Lucene.Net.Tests.Analysis.Kuromoji.csproj   |  106 ++
 ...ene.Net.Tests.Analysis.Kuromoji.project.json |   11 +
 .../Lucene.Net.Tests.Analysis.Kuromoji.xproj    |   41 +
 .../Properties/AssemblyInfo.cs                  |   38 +
 .../StringMockResourceLoader.cs                 |   67 +
 .../Support/TestApiConsistency.cs               |  150 ++
 .../Support/TestExceptionSerialization.cs       |   54 +
 .../TestExtendedMode.cs                         |   82 +
 .../TestJapaneseAnalyzer.cs                     |  229 +++
 .../TestJapaneseBaseFormFilter.cs               |   84 +
 .../TestJapaneseBaseFormFilterFactory.cs        |   60 +
 .../TestJapaneseIterationMarkCharFilter.cs      |  241 +++
 ...estJapaneseIterationMarkCharFilterFactory.cs |  108 ++
 .../TestJapaneseKatakanaStemFilter.cs           |  100 ++
 .../TestJapaneseKatakanaStemFilterFactory.cs    |   62 +
 ...TestJapanesePartOfSpeechStopFilterFactory.cs |   70 +
 .../TestJapaneseReadingFormFilter.cs            |  109 ++
 .../TestJapaneseReadingFormFilterFactory.cs     |   59 +
 .../TestJapaneseTokenizer.cs                    |  846 ++++++++++
 .../TestJapaneseTokenizerFactory.cs             |  134 ++
 .../TestSearchMode.cs                           |   92 ++
 .../Tools/UnknownDictionaryTest.cs              |   93 ++
 .../Util/TestToStringUtil.cs                    |  121 ++
 .../bocchan.utf-8                               |    1 +
 .../project.json                                |   43 +
 .../search-segmentation-tests.txt               |  142 ++
 .../userdict.txt                                |   10 +
 src/Lucene.Net/Support/Collections.cs           |    9 +
 92 files changed, 11827 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/CONTRIBUTING.md
----------------------------------------------------------------------
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 226e681..fa2942c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -36,11 +36,7 @@ helpers to help with that, see for examples see our [Java style methods to avoid
 
 Note that even though we are currently a port of Lucene 4.8.0, we recommend porting over new work from 4.8.1. We hope to begin the work of upgrading to 4.8.1 soon (let us know if interested). There are only about 100 files that changed between 4.8.0 and 4.8.1.
 
-### Pending being ported from scratch (code + tests)
-
-* [Lucene.Net.Analysis.Kuromoji](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.1/lucene/analysis/kuromoji) - See [JIRA issue 567](https://issues.apache.org/jira/browse/LUCENENET-567)
-
-### Pending being ported from scratch (code + tests), but have additional dependencies that also either need to be sourced from the .NET ecosystem or ported.
+### Pending being ported from scratch (code + tests) plus have additional dependencies that either need to be sourced from the .NET ecosystem or ported.
 
 * [Lucene.Net.Benchmark](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.1/lucene/benchmark) - See [JIRA issue 564](https://issues.apache.org/jira/browse/LUCENENET-564)
 * [Lucene.Net.Analysis.Morfologik](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.1/lucene/analysis/morfologik) - See [JIRA issue 568](https://issues.apache.org/jira/browse/LUCENENET-568)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/Lucene.Net.Portable.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.Portable.sln b/Lucene.Net.Portable.sln
index bac9168..e94a262 100644
--- a/Lucene.Net.Portable.sln
+++ b/Lucene.Net.Portable.sln
@@ -103,6 +103,10 @@ Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "lucene-cli", "src\tools\luc
 EndProject
 Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Tests.Cli", "src\tools\Lucene.Net.Tests.Cli\Lucene.Net.Tests.Cli.xproj", "{495B65F0-0B01-40FE-9DC8-5A82C49E07EF}"
 EndProject
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Analysis.Kuromoji", "src\Lucene.Net.Analysis.Kuromoji\Lucene.Net.Analysis.Kuromoji.xproj", "{87E54CA7-7394-4705-A99A-0DD638265C56}"
+EndProject
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Tests.Analysis.Kuromoji", "src\Lucene.Net.Tests.Analysis.Kuromoji\Lucene.Net.Tests.Analysis.Kuromoji.xproj", "{F82F0F31-09E7-48FB-B5FF-F3A84627A307}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -471,6 +475,22 @@ Global
 		{495B65F0-0B01-40FE-9DC8-5A82C49E07EF}.Release|Any CPU.Build.0 = Release|Any CPU
 		{495B65F0-0B01-40FE-9DC8-5A82C49E07EF}.Release|x86.ActiveCfg = Release|Any CPU
 		{495B65F0-0B01-40FE-9DC8-5A82C49E07EF}.Release|x86.Build.0 = Release|Any CPU
+		{87E54CA7-7394-4705-A99A-0DD638265C56}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{87E54CA7-7394-4705-A99A-0DD638265C56}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{87E54CA7-7394-4705-A99A-0DD638265C56}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{87E54CA7-7394-4705-A99A-0DD638265C56}.Debug|x86.Build.0 = Debug|Any CPU
+		{87E54CA7-7394-4705-A99A-0DD638265C56}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{87E54CA7-7394-4705-A99A-0DD638265C56}.Release|Any CPU.Build.0 = Release|Any CPU
+		{87E54CA7-7394-4705-A99A-0DD638265C56}.Release|x86.ActiveCfg = Release|Any CPU
+		{87E54CA7-7394-4705-A99A-0DD638265C56}.Release|x86.Build.0 = Release|Any CPU
+		{F82F0F31-09E7-48FB-B5FF-F3A84627A307}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{F82F0F31-09E7-48FB-B5FF-F3A84627A307}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{F82F0F31-09E7-48FB-B5FF-F3A84627A307}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{F82F0F31-09E7-48FB-B5FF-F3A84627A307}.Debug|x86.Build.0 = Debug|Any CPU
+		{F82F0F31-09E7-48FB-B5FF-F3A84627A307}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{F82F0F31-09E7-48FB-B5FF-F3A84627A307}.Release|Any CPU.Build.0 = Release|Any CPU
+		{F82F0F31-09E7-48FB-B5FF-F3A84627A307}.Release|x86.ActiveCfg = Release|Any CPU
+		{F82F0F31-09E7-48FB-B5FF-F3A84627A307}.Release|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index a187ccc..5450020 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -106,6 +106,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Demo", "src\Luce
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Demo", "src\Lucene.Net.Tests.Demo\Lucene.Net.Tests.Demo.csproj", "{571B361E-B0D4-445E-A0BC-1A24AA184258}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Analysis.Kuromoji", "src\Lucene.Net.Analysis.Kuromoji\Lucene.Net.Analysis.Kuromoji.csproj", "{8408625A-2508-46D5-8519-045183C43724}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Analysis.Kuromoji", "src\Lucene.Net.Tests.Analysis.Kuromoji\Lucene.Net.Tests.Analysis.Kuromoji.csproj", "{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -1059,6 +1063,54 @@ Global
 		{571B361E-B0D4-445E-A0BC-1A24AA184258}.Release35|Mixed Platforms.Build.0 = Release|Any CPU
 		{571B361E-B0D4-445E-A0BC-1A24AA184258}.Release35|x86.ActiveCfg = Release|Any CPU
 		{571B361E-B0D4-445E-A0BC-1A24AA184258}.Release35|x86.Build.0 = Release|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Debug|x86.Build.0 = Debug|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Debug35|Any CPU.ActiveCfg = Debug|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Debug35|Any CPU.Build.0 = Debug|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Debug35|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Debug35|Mixed Platforms.Build.0 = Debug|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Debug35|x86.ActiveCfg = Debug|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Debug35|x86.Build.0 = Debug|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Release|Any CPU.Build.0 = Release|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Release|x86.ActiveCfg = Release|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Release|x86.Build.0 = Release|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Release35|Any CPU.ActiveCfg = Release|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Release35|Any CPU.Build.0 = Release|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Release35|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Release35|Mixed Platforms.Build.0 = Release|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Release35|x86.ActiveCfg = Release|Any CPU
+		{8408625A-2508-46D5-8519-045183C43724}.Release35|x86.Build.0 = Release|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Debug|x86.Build.0 = Debug|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Debug35|Any CPU.ActiveCfg = Debug|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Debug35|Any CPU.Build.0 = Debug|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Debug35|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Debug35|Mixed Platforms.Build.0 = Debug|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Debug35|x86.ActiveCfg = Debug|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Debug35|x86.Build.0 = Debug|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release|Any CPU.Build.0 = Release|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release|x86.ActiveCfg = Release|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release|x86.Build.0 = Release|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release35|Any CPU.ActiveCfg = Release|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release35|Any CPU.Build.0 = Release|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release35|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release35|Mixed Platforms.Build.0 = Release|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release35|x86.ActiveCfg = Release|Any CPU
+		{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}.Release35|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index ee56fce..3d0df77 100644
--- a/README.md
+++ b/README.md
@@ -41,8 +41,13 @@ PM> Install-Package Lucene.Net -Pre
 ```
 
 As of 4.8.0, Lucene.Net is now divided into several specialized sub-packages, all available on NuGet.
-<!--- TO BE ADDED WHEN RELEASED - [Lucene.Net.Analysis.Phonetic](https://www.nuget.org/packages/Lucene.Net.Analysis.Phonetic/) - Analyzer for indexing phonetic signatures (for sounds-alike search) ) 
+
+<!--- TO BE ADDED WHEN RELEASED 
+- [Lucene.Net.Analysis.Kuromoji](https://www.nuget.org/packages/Lucene.Net.Analysis.Kuromoji/) - Japanese Morphological Analyzer 
+- [Lucene.Net.Analysis.Phonetic](https://www.nuget.org/packages/Lucene.Net.Analysis.Phonetic/) - Analyzer for indexing phonetic signatures (for sounds-alike search)
 - [Lucene.Net.Analysis.SmartCn](https://www.nuget.org/packages/Lucene.Net.Analysis.SmartCn/) - Analyzer for indexing Chinese)-->
+
+
 - [Lucene.Net](https://www.nuget.org/packages/Lucene.Net/) - Core library
 - [Lucene.Net.Analysis.Common](https://www.nuget.org/packages/Lucene.Net.Analysis.Common/) - Analyzers for indexing content in different languages and domains
 - [Lucene.Net.Analysis.Stempel](https://www.nuget.org/packages/Lucene.Net.Analysis.Stempel/) - Analyzer for indexing Polish
@@ -52,7 +57,7 @@ As of 4.8.0, Lucene.Net is now divided into several specialized sub-packages, al
 - [Lucene.Net.Facet](https://www.nuget.org/packages/Lucene.Net.Facet/) - Faceted indexing and search capabilities
 - [Lucene.Net.Grouping](https://www.nuget.org/packages/Lucene.Net.Grouping/) - Collectors for grouping search results
 - [Lucene.Net.Highlighter](https://www.nuget.org/packages/Lucene.Net.Highlighter/) - Highlights search keywords in results
-- [Lucene.Net.ICU](https://www.nuget.org/packages/Lucene.Net.ICU/) - Specialized international support for languages that don't space words
+- [Lucene.Net.ICU](https://www.nuget.org/packages/Lucene.Net.ICU/) - Specialized ICU (International Components for Unicode) Analyzers and Highlighters
 - [Lucene.Net.Join](https://www.nuget.org/packages/Lucene.Net.Join/) - Index-time and Query-time joins for normalized content
 - [Lucene.Net.Memory](https://www.nuget.org/packages/Lucene.Net.Memory/) - Single-document in-memory index implementation
 - [Lucene.Net.Misc](https://www.nuget.org/packages/Lucene.Net.Misc/) - Index tools and other miscellaneous code

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/BinaryDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/BinaryDictionary.cs b/src/Lucene.Net.Analysis.Kuromoji/Dict/BinaryDictionary.cs
new file mode 100644
index 0000000..4068e38
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Dict/BinaryDictionary.cs
@@ -0,0 +1,330 @@
+using Lucene.Net.Codecs;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Support.IO;
+using Lucene.Net.Util;
+using System;
+using System.IO;
+using System.Reflection;
+
+namespace Lucene.Net.Analysis.Ja.Dict
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Base class for a binary-encoded in-memory dictionary.
+    /// </summary>
+    public abstract class BinaryDictionary : IDictionary
+    {
+        public static readonly string DICT_FILENAME_SUFFIX = "$buffer.dat";
+        public static readonly string TARGETMAP_FILENAME_SUFFIX = "$targetMap.dat";
+        public static readonly string POSDICT_FILENAME_SUFFIX = "$posDict.dat";
+
+        public static readonly string DICT_HEADER = "kuromoji_dict";
+        public static readonly string TARGETMAP_HEADER = "kuromoji_dict_map";
+        public static readonly string POSDICT_HEADER = "kuromoji_dict_pos";
+        public static readonly int VERSION = 1;
+
+        private readonly ByteBuffer buffer;
+        private readonly int[] targetMapOffsets, targetMap;
+        private readonly string[] posDict;
+        private readonly string[] inflTypeDict;
+        private readonly string[] inflFormDict;
+
+        protected BinaryDictionary()
+        {
+            int[] targetMapOffsets = null, targetMap = null;
+            string[] posDict = null;
+            string[] inflFormDict = null;
+            string[] inflTypeDict = null;
+            ByteBuffer buffer = null;
+
+            using (Stream mapIS = GetResource(TARGETMAP_FILENAME_SUFFIX))
+            {
+                DataInput @in = new InputStreamDataInput(mapIS);
+                CodecUtil.CheckHeader(@in, TARGETMAP_HEADER, VERSION, VERSION);
+                targetMap = new int[@in.ReadVInt32()];
+                targetMapOffsets = new int[@in.ReadVInt32()];
+                int accum = 0, sourceId = 0;
+                for (int ofs = 0; ofs < targetMap.Length; ofs++)
+                {
+                    int val = @in.ReadVInt32();
+                    if ((val & 0x01) != 0)
+                    {
+                        targetMapOffsets[sourceId] = ofs;
+                        sourceId++;
+                    }
+                    accum += (int)((uint)val) >> 1;
+                    targetMap[ofs] = accum;
+                }
+                if (sourceId + 1 != targetMapOffsets.Length)
+                    throw new IOException("targetMap file format broken");
+                targetMapOffsets[sourceId] = targetMap.Length;
+            }
+
+            using (Stream posIS = GetResource(POSDICT_FILENAME_SUFFIX))
+            {
+                DataInput @in = new InputStreamDataInput(posIS);
+                CodecUtil.CheckHeader(@in, POSDICT_HEADER, VERSION, VERSION);
+                int posSize = @in.ReadVInt32();
+                posDict = new string[posSize];
+                inflTypeDict = new string[posSize];
+                inflFormDict = new string[posSize];
+                for (int j = 0; j < posSize; j++)
+                {
+                    posDict[j] = @in.ReadString();
+                    inflTypeDict[j] = @in.ReadString();
+                    inflFormDict[j] = @in.ReadString();
+                    // this is how we encode null inflections
+                    if (inflTypeDict[j].Length == 0)
+                    {
+                        inflTypeDict[j] = null;
+                    }
+                    if (inflFormDict[j].Length == 0)
+                    {
+                        inflFormDict[j] = null;
+                    }
+                }
+            }
+
+            ByteBuffer tmpBuffer;
+
+            using (Stream dictIS = GetResource(DICT_FILENAME_SUFFIX))
+            {
+                // no buffering here, as we load in one large buffer
+                DataInput @in = new InputStreamDataInput(dictIS);
+                CodecUtil.CheckHeader(@in, DICT_HEADER, VERSION, VERSION);
+                int size = @in.ReadVInt32();
+                tmpBuffer = ByteBuffer.Allocate(size); // AllocateDirect..?
+                int read = dictIS.Read(tmpBuffer.Array, 0, size);
+                if (read != size)
+                {
+                    throw new EndOfStreamException("Cannot read whole dictionary");
+                }
+            }
+            buffer = tmpBuffer.AsReadOnlyBuffer();
+
+            this.targetMap = targetMap;
+            this.targetMapOffsets = targetMapOffsets;
+            this.posDict = posDict;
+            this.inflTypeDict = inflTypeDict;
+            this.inflFormDict = inflFormDict;
+            this.buffer = buffer;
+        }
+
+        protected Stream GetResource(string suffix)
+        {
+            return GetTypeResource(GetType(), suffix);
+        }
+
+        // util, reused by ConnectionCosts and CharacterDefinition
+        public static Stream GetTypeResource(Type clazz, string suffix)
+        {
+            Stream @is = clazz.GetTypeInfo().Assembly.FindAndGetManifestResourceStream(clazz, clazz.Name + suffix);
+            if (@is == null)
+                throw new FileNotFoundException("Not in assembly: " + clazz.FullName + suffix);
+            return @is;
+        }
+
+        public virtual void LookupWordIds(int sourceId, Int32sRef @ref)
+        {
+            @ref.Int32s = targetMap;
+            @ref.Offset = targetMapOffsets[sourceId];
+            // targetMapOffsets always has one more entry pointing behind last:
+            @ref.Length = targetMapOffsets[sourceId + 1] - @ref.Offset;
+        }
+
+        public virtual int GetLeftId(int wordId)
+        {
+            return (short)((ushort)buffer.GetInt16(wordId)) >> 3;
+        }
+
+        public virtual int GetRightId(int wordId)
+        {
+            return (short)((ushort)buffer.GetInt16(wordId)) >> 3;
+        }
+
+        public virtual int GetWordCost(int wordId)
+        {
+            return buffer.GetInt16(wordId + 2);  // Skip id
+        }
+
+        public virtual string GetBaseForm(int wordId, char[] surfaceForm, int off, int len)
+        {
+            if (HasBaseFormData(wordId))
+            {
+                int offset = BaseFormOffset(wordId);
+                int data = buffer.Get(offset++) & 0xff;
+                int prefix = (int)((uint)data) >> 4;
+                int suffix = data & 0xF;
+                char[] text = new char[prefix + suffix];
+                System.Array.Copy(surfaceForm, off, text, 0, prefix);
+                for (int i = 0; i < suffix; i++)
+                {
+                    text[prefix + i] = buffer.GetChar(offset + (i << 1));
+                }
+                return new string(text);
+            }
+            else
+            {
+                return null;
+            }
+        }
+
+        public virtual string GetReading(int wordId, char[] surface, int off, int len)
+        {
+            if (HasReadingData(wordId))
+            {
+                int offset = ReadingOffset(wordId);
+                int readingData = buffer.Get(offset++) & 0xff;
+                return ReadString(offset, (int)((uint)readingData) >> 1, (readingData & 1) == 1);
+            }
+            else
+            {
+                // the reading is the surface form, with hiragana shifted to katakana
+                char[] text = new char[len];
+                for (int i = 0; i < len; i++)
+                {
+                    char ch = surface[off + i];
+                    if (ch > 0x3040 && ch < 0x3097)
+                    {
+                        text[i] = (char)(ch + 0x60);
+                    }
+                    else
+                    {
+                        text[i] = ch;
+                    }
+                }
+                return new string(text);
+            }
+        }
+
+        public virtual string GetPartOfSpeech(int wordId)
+        {
+            return posDict[GetLeftId(wordId)];
+        }
+
+        public virtual string GetPronunciation(int wordId, char[] surface, int off, int len)
+        {
+            if (HasPronunciationData(wordId))
+            {
+                int offset = PronunciationOffset(wordId);
+                int pronunciationData = buffer.Get(offset++) & 0xff;
+                return ReadString(offset, (int)((uint)pronunciationData) >> 1, (pronunciationData & 1) == 1);
+            }
+            else
+            {
+                return GetReading(wordId, surface, off, len); // same as the reading
+            }
+        }
+
+        public virtual string GetInflectionType(int wordId)
+        {
+            return inflTypeDict[GetLeftId(wordId)];
+        }
+
+        public virtual string GetInflectionForm(int wordId)
+        {
+            return inflFormDict[GetLeftId(wordId)];
+        }
+
+        private static int BaseFormOffset(int wordId)
+        {
+            return wordId + 4;
+        }
+
+        private int ReadingOffset(int wordId)
+        {
+            int offset = BaseFormOffset(wordId);
+            if (HasBaseFormData(wordId))
+            {
+                int baseFormLength = buffer.Get(offset++) & 0xf;
+                return offset + (baseFormLength << 1);
+            }
+            else
+            {
+                return offset;
+            }
+        }
+
+        private int PronunciationOffset(int wordId)
+        {
+            if (HasReadingData(wordId))
+            {
+                int offset = ReadingOffset(wordId);
+                int readingData = buffer.Get(offset++) & 0xff;
+                int readingLength;
+                if ((readingData & 1) == 0)
+                {
+                    readingLength = readingData & 0xfe; // UTF-16: mask off kana bit
+                }
+                else
+                {
+                    readingLength = (int)((uint)readingData) >> 1;
+                }
+                return offset + readingLength;
+            }
+            else
+            {
+                return ReadingOffset(wordId);
+            }
+        }
+
+        private bool HasBaseFormData(int wordId)
+        {
+            return (buffer.GetInt16(wordId) & HAS_BASEFORM) != 0;
+        }
+
+        private bool HasReadingData(int wordId)
+        {
+            return (buffer.GetInt16(wordId) & HAS_READING) != 0;
+        }
+
+        private bool HasPronunciationData(int wordId)
+        {
+            return (buffer.GetInt16(wordId) & HAS_PRONUNCIATION) != 0;
+        }
+
+        private string ReadString(int offset, int length, bool kana)
+        {
+            char[] text = new char[length];
+            if (kana)
+            {
+                for (int i = 0; i < length; i++)
+                {
+                    text[i] = (char)(0x30A0 + (buffer.Get(offset + i) & 0xff));
+                }
+            }
+            else
+            {
+                for (int i = 0; i < length; i++)
+                {
+                    text[i] = buffer.GetChar(offset + (i << 1));
+                }
+            }
+            return new string(text);
+        }
+
+        /// <summary>flag that the entry has baseform data. otherwise its not inflected (same as surface form)</summary>
+        public static readonly int HAS_BASEFORM = 1;
+        /// <summary>flag that the entry has reading data. otherwise reading is surface form converted to katakana</summary>
+        public static readonly int HAS_READING = 2;
+        /// <summary>flag that the entry has pronunciation data. otherwise pronunciation is the reading</summary>
+        public static readonly int HAS_PRONUNCIATION = 4;
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/CharacterDefinition.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/CharacterDefinition.cs b/src/Lucene.Net.Analysis.Kuromoji/Dict/CharacterDefinition.cs
new file mode 100644
index 0000000..2821941
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Dict/CharacterDefinition.cs
@@ -0,0 +1,124 @@
+using Lucene.Net.Codecs;
+using Lucene.Net.Store;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja.Dict
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Character category data.
+    /// </summary>
+    public sealed class CharacterDefinition
+    {
+        public static readonly string FILENAME_SUFFIX = ".dat";
+        public static readonly string HEADER = "kuromoji_cd";
+        public static readonly int VERSION = 1;
+
+        public static readonly int CLASS_COUNT = Enum.GetValues(typeof(CharacterClass)).Length;
+
+        // only used internally for lookup:
+        private enum CharacterClass : byte
+        {
+            NGRAM, DEFAULT, SPACE, SYMBOL, NUMERIC, ALPHA, CYRILLIC, GREEK, HIRAGANA, KATAKANA, KANJI, KANJINUMERIC
+        }
+
+        private readonly byte[] characterCategoryMap = new byte[0x10000];
+
+        private readonly bool[] invokeMap = new bool[CLASS_COUNT];
+        private readonly bool[] groupMap = new bool[CLASS_COUNT];
+
+        // the classes:
+        public static readonly byte NGRAM = (byte)CharacterClass.NGRAM;
+        public static readonly byte DEFAULT = (byte)CharacterClass.DEFAULT;
+        public static readonly byte SPACE = (byte)CharacterClass.SPACE;
+        public static readonly byte SYMBOL = (byte)CharacterClass.SYMBOL;
+        public static readonly byte NUMERIC = (byte)CharacterClass.NUMERIC;
+        public static readonly byte ALPHA = (byte)CharacterClass.ALPHA;
+        public static readonly byte CYRILLIC = (byte)CharacterClass.CYRILLIC;
+        public static readonly byte GREEK = (byte)CharacterClass.GREEK;
+        public static readonly byte HIRAGANA = (byte)CharacterClass.HIRAGANA;
+        public static readonly byte KATAKANA = (byte)CharacterClass.KATAKANA;
+        public static readonly byte KANJI = (byte)CharacterClass.KANJI;
+        public static readonly byte KANJINUMERIC = (byte)CharacterClass.KANJINUMERIC;
+
+        private CharacterDefinition()
+        {
+            using (Stream @is = BinaryDictionary.GetTypeResource(GetType(), FILENAME_SUFFIX))
+            {
+                DataInput @in = new InputStreamDataInput(@is);
+                CodecUtil.CheckHeader(@in, HEADER, VERSION, VERSION);
+                @in.ReadBytes(characterCategoryMap, 0, characterCategoryMap.Length);
+                for (int i = 0; i < CLASS_COUNT; i++)
+                {
+                    byte b = @in.ReadByte();
+                    invokeMap[i] = (b & 0x01) != 0;
+                    groupMap[i] = (b & 0x02) != 0;
+                }
+            }
+        }
+
+        public byte GetCharacterClass(char c)
+        {
+            return characterCategoryMap[c];
+        }
+
+        public bool IsInvoke(char c)
+        {
+            return invokeMap[characterCategoryMap[c]];
+        }
+
+        public bool IsGroup(char c)
+        {
+            return groupMap[characterCategoryMap[c]];
+        }
+
+        public bool IsKanji(char c)
+        {
+            byte characterClass = characterCategoryMap[c];
+            return characterClass == KANJI || characterClass == KANJINUMERIC;
+        }
+
+        public static byte LookupCharacterClass(string characterClassName)
+        {
+            return (byte)Enum.Parse(typeof(CharacterClass), characterClassName, true);
+        }
+
+        public static CharacterDefinition GetInstance()
+        {
+            return SingletonHolder.INSTANCE;
+        }
+
+        private class SingletonHolder
+        {
+            internal static readonly CharacterDefinition INSTANCE;
+            static SingletonHolder()
+            {
+                try
+                {
+                    INSTANCE = new CharacterDefinition();
+                }
+                catch (IOException ioe)
+                {
+                    throw new Exception("Cannot load CharacterDefinition.", ioe);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/CharacterDefinition.dat
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/CharacterDefinition.dat b/src/Lucene.Net.Analysis.Kuromoji/Dict/CharacterDefinition.dat
new file mode 100644
index 0000000..4b8bd4b
Binary files /dev/null and b/src/Lucene.Net.Analysis.Kuromoji/Dict/CharacterDefinition.dat differ

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/ConnectionCosts.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/ConnectionCosts.cs b/src/Lucene.Net.Analysis.Kuromoji/Dict/ConnectionCosts.cs
new file mode 100644
index 0000000..02d8eb5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Dict/ConnectionCosts.cs
@@ -0,0 +1,90 @@
+using Lucene.Net.Codecs;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja.Dict
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// n-gram connection cost data
+    /// </summary>
+    public sealed class ConnectionCosts
+    {
+        public static readonly string FILENAME_SUFFIX = ".dat";
+        public static readonly string HEADER = "kuromoji_cc";
+        public static readonly int VERSION = 1;
+
+        private readonly short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.
+
+        private ConnectionCosts()
+        {
+            short[][] costs = null;
+
+            using (Stream @is = BinaryDictionary.GetTypeResource(GetType(), FILENAME_SUFFIX))
+            {
+                DataInput @in = new InputStreamDataInput(@is);
+                CodecUtil.CheckHeader(@in, HEADER, VERSION, VERSION);
+                int forwardSize = @in.ReadVInt32();
+                int backwardSize = @in.ReadVInt32();
+                costs = RectangularArrays.ReturnRectangularArray<short>(backwardSize, forwardSize);
+                int accum = 0;
+                for (int j = 0; j < costs.Length; j++)
+                {
+                    short[] a = costs[j];
+                    for (int i = 0; i < a.Length; i++)
+                    {
+                        int raw = @in.ReadVInt32();
+                        accum += ((int)((uint)raw) >> 1) ^ -(raw & 1);
+                        a[i] = (short)accum;
+                    }
+                }
+            }
+
+            this.costs = costs;
+        }
+
+        public int Get(int forwardId, int backwardId)
+        {
+            return costs[backwardId][forwardId];
+        }
+
+        public static ConnectionCosts GetInstance()
+        {
+            return SingletonHolder.INSTANCE;
+        }
+
+        private class SingletonHolder
+        {
+            internal static readonly ConnectionCosts INSTANCE;
+            static SingletonHolder()
+            {
+                try
+                {
+                    INSTANCE = new ConnectionCosts();
+                }
+                catch (IOException ioe)
+                {
+                    throw new Exception("Cannot load ConnectionCosts.", ioe);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/ConnectionCosts.dat
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/ConnectionCosts.dat b/src/Lucene.Net.Analysis.Kuromoji/Dict/ConnectionCosts.dat
new file mode 100644
index 0000000..7679f14
Binary files /dev/null and b/src/Lucene.Net.Analysis.Kuromoji/Dict/ConnectionCosts.dat differ

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/Dictionary.cs b/src/Lucene.Net.Analysis.Kuromoji/Dict/Dictionary.cs
new file mode 100644
index 0000000..4c24a4e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Dict/Dictionary.cs
@@ -0,0 +1,106 @@
+namespace Lucene.Net.Analysis.Ja.Dict
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Dictionary interface for retrieving morphological data
+    /// by id.
+    /// </summary>
+    public interface IDictionary
+    {
+        /// <summary>
+        /// Get left id of specified word.
+        /// </summary>
+        /// <param name="wordId">Word ID of token.</param>
+        /// <returns>Left id.</returns>
+        int GetLeftId(int wordId);
+
+        /// <summary>
+        /// Get right id of specified word.
+        /// </summary>
+        /// <param name="wordId">Word ID of token.</param>
+        /// <returns>Right id.</returns>
+        int GetRightId(int wordId);
+
+        /// <summary>
+        /// Get word cost of specified word
+        /// </summary>
+        /// <param name="wordId">Word ID of token.</param>
+        /// <returns>Word's cost.</returns>
+        int GetWordCost(int wordId);
+
+        /// <summary>
+        /// Get Part-Of-Speech of tokens
+        /// </summary>
+        /// <param name="wordId">Word ID of token.</param>
+        /// <returns>Part-Of-Speech of the token.</returns>
+        string GetPartOfSpeech(int wordId);
+
+        /// <summary>
+        /// Get reading of tokens.
+        /// </summary>
+        /// <param name="wordId">Word ID of token.</param>
+        /// <param name="surface"></param>
+        /// <param name="off"></param>
+        /// <param name="len"></param>
+        /// <returns>Reading of the token.</returns>
+        string GetReading(int wordId, char[] surface, int off, int len);
+
+        /// <summary>
+        /// Get base form of word.
+        /// </summary>
+        /// <param name="wordId">Word ID of token.</param>
+        /// <param name="surface"></param>
+        /// <param name="off"></param>
+        /// <param name="len"></param>
+        /// <returns>Base form (only different for inflected words, otherwise null).</returns>
+        string GetBaseForm(int wordId, char[] surface, int off, int len);
+
+        /// <summary>
+        /// Get pronunciation of tokens
+        /// </summary>
+        /// <param name="wordId">Word ID of token.</param>
+        /// <param name="surface"></param>
+        /// <param name="off"></param>
+        /// <param name="len"></param>
+        /// <returns>Pronunciation of the token.</returns>
+        string GetPronunciation(int wordId, char[] surface, int off, int len);
+
+        /// <summary>
+        /// Get inflection type of tokens.
+        /// </summary>
+        /// <param name="wordId">Word ID of token.</param>
+        /// <returns>Inflection type, or null.</returns>
+        string GetInflectionType(int wordId);
+
+        /// <summary>
+        /// Get inflection form of tokens.
+        /// </summary>
+        /// <param name="wordId">Word ID of token.</param>
+        /// <returns>Inflection form, or null.</returns>
+        string GetInflectionForm(int wordId);
+        // TODO: maybe we should have a optimal method, a non-typesafe
+        // 'getAdditionalData' if other dictionaries like unidic have additional data
+    }
+
+    // LUCENENT TODO: Make this whole thing into an abstact class??
+    public class Dictionary
+    {
+        public static readonly string INTERNAL_SEPARATOR = "\u0000";
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$buffer.dat
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$buffer.dat b/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$buffer.dat
new file mode 100644
index 0000000..dcf430a
Binary files /dev/null and b/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$buffer.dat differ

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$fst.dat
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$fst.dat b/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$fst.dat
new file mode 100644
index 0000000..ea5c43c
Binary files /dev/null and b/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$fst.dat differ

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$posDict.dat
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$posDict.dat b/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$posDict.dat
new file mode 100644
index 0000000..e727d90
Binary files /dev/null and b/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$posDict.dat differ

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$targetMap.dat
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$targetMap.dat b/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$targetMap.dat
new file mode 100644
index 0000000..0e27345
Binary files /dev/null and b/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary$targetMap.dat differ

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary.cs b/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary.cs
new file mode 100644
index 0000000..d46312b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoDictionary.cs
@@ -0,0 +1,72 @@
+using Lucene.Net.Store;
+using Lucene.Net.Util.Fst;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja.Dict
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Binary dictionary implementation for a known-word dictionary model:
+    /// Words are encoded into an FST mapping to a list of wordIDs.
+    /// </summary>
+    public sealed class TokenInfoDictionary : BinaryDictionary
+    {
+        public static readonly string FST_FILENAME_SUFFIX = "$fst.dat";
+
+        private readonly TokenInfoFST fst;
+
+        private TokenInfoDictionary()
+        {
+            FST<long?> fst = null;
+            using (Stream @is = GetResource(FST_FILENAME_SUFFIX))
+            {
+                fst = new FST<long?>(new InputStreamDataInput(@is), PositiveInt32Outputs.Singleton);
+            }
+            // TODO: some way to configure?
+            this.fst = new TokenInfoFST(fst, true);
+        }
+
+        public TokenInfoFST FST
+        {
+            get { return fst; }
+        }
+
+        public static TokenInfoDictionary GetInstance()
+        {
+            return SingletonHolder.INSTANCE;
+        }
+
+        private class SingletonHolder
+        {
+            internal static readonly TokenInfoDictionary INSTANCE;
+            static SingletonHolder()
+            {
+                try
+                {
+                    INSTANCE = new TokenInfoDictionary();
+                }
+                catch (IOException ioe)
+                {
+                    throw new Exception("Cannot load TokenInfoDictionary.", ioe);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoFST.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoFST.cs b/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoFST.cs
new file mode 100644
index 0000000..dffdfbb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Dict/TokenInfoFST.cs
@@ -0,0 +1,118 @@
+using Lucene.Net.Util.Fst;
+using System.Diagnostics;
+
+namespace Lucene.Net.Analysis.Ja.Dict
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Thin wrapper around an FST with root-arc caching for Japanese.
+    /// <para/>
+    /// Depending upon fasterButMoreRam, either just kana (191 arcs),
+    /// or kana and han (28,607 arcs) are cached. The latter offers
+    /// additional performance at the cost of more RAM.
+    /// </summary>
+    public sealed class TokenInfoFST
+    {
+        private readonly FST<long?> fst;
+
+        // depending upon fasterButMoreRam, we cache root arcs for either 
+        // kana (0x3040-0x30FF) or kana + han (0x3040-0x9FFF)
+        // false: 191 arcs
+        // true:  28,607 arcs (costs ~1.5MB)
+        private readonly int cacheCeiling;
+        private readonly FST.Arc<long?>[] rootCache;
+
+        private readonly long? NO_OUTPUT;
+
+        // LUCENENET specific - made field private
+        // and added public property for reading it.
+        public long? NoOutput
+        {
+            get { return NO_OUTPUT; }
+        }
+
+        public TokenInfoFST(FST<long?> fst, bool fasterButMoreRam)
+        {
+            this.fst = fst;
+            this.cacheCeiling = fasterButMoreRam ? 0x9FFF : 0x30FF;
+            NO_OUTPUT = fst.Outputs.NoOutput;
+            rootCache = CacheRootArcs();
+        }
+
+        private FST.Arc<long?>[] CacheRootArcs()
+        {
+            FST.Arc<long?>[] rootCache = new FST.Arc<long?>[1 + (cacheCeiling - 0x3040)];
+            FST.Arc<long?> firstArc = new FST.Arc<long?>();
+            fst.GetFirstArc(firstArc);
+            FST.Arc<long?> arc = new FST.Arc<long?>();
+            FST.BytesReader fstReader = fst.GetBytesReader();
+            // TODO: jump to 3040, readNextRealArc to ceiling? (just be careful we don't add bugs)
+            for (int i = 0; i < rootCache.Length; i++)
+            {
+                if (fst.FindTargetArc(0x3040 + i, firstArc, arc, fstReader) != null)
+                {
+                    rootCache[i] = new FST.Arc<long?>().CopyFrom(arc);
+                }
+            }
+            return rootCache;
+        }
+
+        public FST.Arc<long?> FindTargetArc(int ch, FST.Arc<long?> follow, FST.Arc<long?> arc, bool useCache, FST.BytesReader fstReader)
+        {
+            if (useCache && ch >= 0x3040 && ch <= cacheCeiling)
+            {
+                Debug.Assert(ch != FST.END_LABEL);
+                FST.Arc<long?> result = rootCache[ch - 0x3040];
+                if (result == null)
+                {
+                    return null;
+                }
+                else
+                {
+                    arc.CopyFrom(result);
+                    return arc;
+                }
+            }
+            else
+            {
+                return fst.FindTargetArc(ch, follow, arc, fstReader);
+            }
+        }
+
+        public FST.Arc<long?> GetFirstArc(FST.Arc<long?> arc)
+        {
+            return fst.GetFirstArc(arc);
+        }
+
+        public FST.BytesReader GetBytesReader()
+        {
+            return fst.GetBytesReader();
+        }
+
+        /// <summary>
+        /// for testing only
+        /// <para/>
+        /// @lucene.internal 
+        /// </summary>
+        internal FST<long?> InternalFST
+        {
+            get { return fst; }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary$buffer.dat
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary$buffer.dat b/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary$buffer.dat
new file mode 100644
index 0000000..16f0a82
Binary files /dev/null and b/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary$buffer.dat differ

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary$posDict.dat
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary$posDict.dat b/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary$posDict.dat
new file mode 100644
index 0000000..e709dcc
Binary files /dev/null and b/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary$posDict.dat differ

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary$targetMap.dat
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary$targetMap.dat b/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary$targetMap.dat
new file mode 100644
index 0000000..e8db0b3
Binary files /dev/null and b/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary$targetMap.dat differ

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary.cs b/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary.cs
new file mode 100644
index 0000000..364576b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Dict/UnknownDictionary.cs
@@ -0,0 +1,100 @@
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja.Dict
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Dictionary for unknown-word handling.
+    /// </summary>
+    public class UnknownDictionary : BinaryDictionary
+    {
+        private readonly CharacterDefinition characterDefinition = CharacterDefinition.GetInstance();
+
+        private UnknownDictionary()
+        {
+        }
+
+        public virtual int Lookup(char[] text, int offset, int len)
+        {
+            if (!characterDefinition.IsGroup(text[offset]))
+            {
+                return 1;
+            }
+
+            // Extract unknown word. Characters with the same character class are considered to be part of unknown word
+            byte characterIdOfFirstCharacter = characterDefinition.GetCharacterClass(text[offset]);
+            int length = 1;
+            for (int i = 1; i < len; i++)
+            {
+                if (characterIdOfFirstCharacter == characterDefinition.GetCharacterClass(text[offset + i]))
+                {
+                    length++;
+                }
+                else
+                {
+                    break;
+                }
+            }
+
+            return length;
+        }
+
+        public virtual CharacterDefinition CharacterDefinition
+        {
+            get { return characterDefinition; }
+        }
+
+        public override string GetReading(int wordId, char[] surface, int off, int len)
+        {
+            return null;
+        }
+
+        public override string GetInflectionType(int wordId)
+        {
+            return null;
+        }
+
+        public override string GetInflectionForm(int wordId)
+        {
+            return null;
+        }
+
+        public static UnknownDictionary GetInstance()
+        {
+            return SingletonHolder.INSTANCE;
+        }
+
+        private class SingletonHolder
+        {
+            internal static readonly UnknownDictionary INSTANCE;
+            static SingletonHolder()
+            {
+                try
+                {
+                    INSTANCE = new UnknownDictionary();
+                }
+                catch (IOException ioe)
+                {
+                    throw new Exception("Cannot load UnknownDictionary.", ioe);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Dict/UserDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/UserDictionary.cs b/src/Lucene.Net.Analysis.Kuromoji/Dict/UserDictionary.cs
new file mode 100644
index 0000000..3fb2b09
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Dict/UserDictionary.cs
@@ -0,0 +1,300 @@
+using Lucene.Net.Analysis.Ja.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Fst;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Ja.Dict
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Class for building a User Dictionary.
+    /// This class allows for custom segmentation of phrases.
+    /// </summary>
+    public sealed class UserDictionary : IDictionary
+    {
+        // phrase text -> phrase ID
+        private readonly TokenInfoFST fst;
+
+        // holds wordid, length, length... indexed by phrase ID
+        private readonly int[][] segmentations;
+
+        // holds readings and POS, indexed by wordid
+        private readonly string[] data;
+
+        private static readonly int CUSTOM_DICTIONARY_WORD_ID_OFFSET = 100000000;
+
+        public static readonly int WORD_COST = -100000;
+
+        public static readonly int LEFT_ID = 5;
+
+        public static readonly int RIGHT_ID = 5;
+
+        private static readonly Regex specialChars = new Regex(@"#.*$", RegexOptions.Compiled);
+        private static readonly Regex commentLine = new Regex(@"  *", RegexOptions.Compiled);
+
+        public UserDictionary(TextReader reader)
+        {
+            string line = null;
+            int wordId = CUSTOM_DICTIONARY_WORD_ID_OFFSET;
+            List<string[]> featureEntries = new List<string[]>();
+
+            // text, segmentation, readings, POS
+            while ((line = reader.ReadLine()) != null)
+            {
+                // Remove comments
+                line = specialChars.Replace(line, "");
+
+                // Skip empty lines or comment lines
+                if (line.Trim().Length == 0)
+                {
+                    continue;
+                }
+                string[] values = CSVUtil.Parse(line);
+                featureEntries.Add(values);
+            }
+
+            // TODO: should we allow multiple segmentations per input 'phrase'?
+            // the old treemap didn't support this either, and i'm not sure if its needed/useful?
+            featureEntries.Sort(new ComparerAnonymousHelper());
+
+            List<string> data = new List<string>(featureEntries.Count);
+            List<int[]> segmentations = new List<int[]>(featureEntries.Count);
+
+            PositiveInt32Outputs fstOutput = PositiveInt32Outputs.Singleton;
+            Builder<long?> fstBuilder = new Builder<long?>(Lucene.Net.Util.Fst.FST.INPUT_TYPE.BYTE2, fstOutput);
+            Int32sRef scratch = new Int32sRef();
+            long ord = 0;
+
+            foreach (string[] values in featureEntries)
+            {
+                string[] segmentation = commentLine.Replace(values[1], " ").Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
+                string[] readings = commentLine.Replace(values[2], " ").Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
+                string pos = values[3];
+
+                if (segmentation.Length != readings.Length)
+                {
+                    throw new Exception("Illegal user dictionary entry " + values[0] +
+                                               " - the number of segmentations (" + segmentation.Length + ")" +
+                                               " does not the match number of readings (" + readings.Length + ")");
+                }
+
+                int[] wordIdAndLength = new int[segmentation.Length + 1]; // wordId offset, length, length....
+                wordIdAndLength[0] = wordId;
+                for (int i = 0; i < segmentation.Length; i++)
+                {
+                    wordIdAndLength[i + 1] = segmentation[i].Length;
+                    data.Add(readings[i] + Dictionary.INTERNAL_SEPARATOR + pos);
+                    wordId++;
+                }
+                // add mapping to FST
+                string token = values[0];
+                scratch.Grow(token.Length);
+                scratch.Length = token.Length;
+                for (int i = 0; i < token.Length; i++)
+                {
+                    scratch.Int32s[i] = (int)token[i];
+                }
+                fstBuilder.Add(scratch, ord);
+                segmentations.Add(wordIdAndLength);
+                ord++;
+            }
+            this.fst = new TokenInfoFST(fstBuilder.Finish(), false);
+            this.data = data.ToArray(/*new string[data.Count]*/);
+            this.segmentations = segmentations.ToArray(/*new int[segmentations.Count][]*/);
+        }
+
+        // LUCENENET TODO: Make an AnonymousComparer class in Support and
+        // replace all of these classes.
+        private class ComparerAnonymousHelper : IComparer<string[]>
+        {
+            public int Compare(string[] left, string[] right)
+            {
+                return left[0].CompareToOrdinal(right[0]);
+            }
+        }
+
+        /// <summary>
+        /// Lookup words in text.
+        /// </summary>
+        /// <param name="chars">Text.</param>
+        /// <param name="off">Offset into text.</param>
+        /// <param name="len">Length of text.</param>
+        /// <returns>Array of {wordId, position, length}.</returns>
+        public int[][] Lookup(char[] chars, int off, int len)
+        {
+            // TODO: can we avoid this treemap/toIndexArray?
+            TreeDictionary<int, int[]> result = new TreeDictionary<int, int[]>(); // index, [length, length...]
+            bool found = false; // true if we found any results
+
+            FST.BytesReader fstReader = fst.GetBytesReader();
+
+            FST.Arc<long?> arc = new FST.Arc<long?>();
+            int end = off + len;
+            for (int startOffset = off; startOffset < end; startOffset++)
+            {
+                arc = fst.GetFirstArc(arc);
+                int output = 0;
+                int remaining = end - startOffset;
+                for (int i = 0; i < remaining; i++)
+                {
+                    int ch = chars[startOffset + i];
+                    if (fst.FindTargetArc(ch, arc, arc, i == 0, fstReader) == null)
+                    {
+                        break; // continue to next position
+                    }
+                    output += (int)arc.Output;
+                    if (arc.IsFinal)
+                    {
+                        int finalOutput = output + (int)arc.NextFinalOutput;
+                        result[startOffset - off] = segmentations[finalOutput];
+                        found = true;
+                    }
+                }
+            }
+
+            return found ? ToIndexArray(result) : EMPTY_RESULT;
+        }
+
+        public TokenInfoFST FST
+        {
+            get { return fst; }
+        }
+
+        private static readonly int[][] EMPTY_RESULT = new int[0][];
+
+        /// <summary>
+        /// Convert Map of index and wordIdAndLength to array of {wordId, index, length}
+        /// </summary>
+        /// <param name="input"></param>
+        /// <returns>Array of {wordId, index, length}.</returns>
+        private int[][] ToIndexArray(TreeDictionary<int, int[]> input)
+        {
+            List<int[]> result = new List<int[]>();
+            foreach (int i in input.Keys)
+            {
+                int[] wordIdAndLength = input[i];
+                int wordId = wordIdAndLength[0];
+                // convert length to index
+                int current = i;
+                for (int j = 1; j < wordIdAndLength.Length; j++)
+                { // first entry is wordId offset
+                    int[] token = { wordId + j - 1, current, wordIdAndLength[j] };
+                    result.Add(token);
+                    current += wordIdAndLength[j];
+                }
+            }
+            return result.ToArray(/*new int[result.size()][]*/);
+        }
+
+        public int[] LookupSegmentation(int phraseID)
+        {
+            return segmentations[phraseID];
+        }
+
+        public int GetLeftId(int wordId)
+        {
+            return LEFT_ID;
+        }
+
+        public int GetRightId(int wordId)
+        {
+            return RIGHT_ID;
+        }
+
+        public int GetWordCost(int wordId)
+        {
+            return WORD_COST;
+        }
+
+        public string GetReading(int wordId, char[] surface, int off, int len)
+        {
+            return GetFeature(wordId, 0);
+        }
+
+        public string GetPartOfSpeech(int wordId)
+        {
+            return GetFeature(wordId, 1);
+        }
+
+        public string GetBaseForm(int wordId, char[] surface, int off, int len)
+        {
+            return null; // TODO: add support?
+        }
+
+        public string GetPronunciation(int wordId, char[] surface, int off, int len)
+        {
+            return null; // TODO: add support?
+        }
+
+        public string GetInflectionType(int wordId)
+        {
+            return null; // TODO: add support?
+        }
+
+        public string GetInflectionForm(int wordId)
+        {
+            return null; // TODO: add support?
+        }
+
+        private string[] GetAllFeaturesArray(int wordId)
+        {
+            string allFeatures = data[wordId - CUSTOM_DICTIONARY_WORD_ID_OFFSET];
+            if (allFeatures == null)
+            {
+                return null;
+            }
+
+            return allFeatures.Split(new string[] { Dictionary.INTERNAL_SEPARATOR }, StringSplitOptions.RemoveEmptyEntries);
+        }
+
+        private string GetFeature(int wordId, params int[] fields)
+        {
+            string[] allFeatures = GetAllFeaturesArray(wordId);
+            if (allFeatures == null)
+            {
+                return null;
+            }
+            StringBuilder sb = new StringBuilder();
+            if (fields.Length == 0)
+            { // All features
+                foreach (string feature in allFeatures)
+                {
+                    sb.Append(CSVUtil.QuoteEscape(feature)).Append(",");
+                }
+            }
+            else if (fields.Length == 1)
+            { // One feature doesn't need to escape value
+                sb.Append(allFeatures[fields[0]]).Append(",");
+            }
+            else
+            {
+                foreach (int field in fields)
+                {
+                    sb.Append(CSVUtil.QuoteEscape(allFeatures[field])).Append(",");
+                }
+            }
+            return sb.Remove(sb.Length - 1, 1).ToString();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/GraphvizFormatter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/GraphvizFormatter.cs b/src/Lucene.Net.Analysis.Kuromoji/GraphvizFormatter.cs
new file mode 100644
index 0000000..bd5233b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/GraphvizFormatter.cs
@@ -0,0 +1,197 @@
+using Lucene.Net.Analysis.Ja.Dict;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    // TODO: would be nice to show 2nd best path in a diff't
+    // color...
+
+    /// <summary>
+    /// Outputs the dot (graphviz) string for the viterbi lattice.
+    /// </summary>
+    public class GraphvizFormatter
+    {
+        private readonly static string BOS_LABEL = "BOS";
+
+        private readonly static string EOS_LABEL = "EOS";
+
+        private readonly static string FONT_NAME = "Helvetica";
+
+        private readonly ConnectionCosts costs;
+
+        private readonly IDictionary<string, string> bestPathMap;
+
+        private readonly StringBuilder sb = new StringBuilder();
+
+        public GraphvizFormatter(ConnectionCosts costs)
+        {
+            this.costs = costs;
+            this.bestPathMap = new Dictionary<string, string>();
+            sb.Append(FormatHeader());
+            sb.Append("  init [style=invis]\n");
+            sb.Append("  init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n");
+        }
+
+        public virtual string Finish()
+        {
+            sb.Append(FormatTrailer());
+            return sb.ToString();
+        }
+
+        // Backtraces another incremental fragment:
+        internal void OnBacktrace(JapaneseTokenizer tok, WrappedPositionArray positions, int lastBackTracePos, Position endPosData, int fromIDX, char[] fragment, bool isEnd)
+        {
+            SetBestPathMap(positions, lastBackTracePos, endPosData, fromIDX);
+            sb.Append(FormatNodes(tok, positions, lastBackTracePos, endPosData, fragment));
+            if (isEnd)
+            {
+                sb.Append("  fini [style=invis]\n");
+                sb.Append("  ");
+                sb.Append(GetNodeID(endPosData.pos, fromIDX));
+                sb.Append(" -> fini [label=\"" + EOS_LABEL + "\"]");
+            }
+        }
+
+        // Records which arcs make up the best bath:
+        private void SetBestPathMap(WrappedPositionArray positions, int startPos, Position endPosData, int fromIDX)
+        {
+            bestPathMap.Clear();
+
+            int pos = endPosData.pos;
+            int bestIDX = fromIDX;
+            while (pos > startPos)
+            {
+                Position posData = positions.Get(pos);
+
+                int backPos = posData.backPos[bestIDX];
+                int backIDX = posData.backIndex[bestIDX];
+
+                string toNodeID = GetNodeID(pos, bestIDX);
+                string fromNodeID = GetNodeID(backPos, backIDX);
+
+                Debug.Assert(!bestPathMap.ContainsKey(fromNodeID));
+                Debug.Assert(!bestPathMap.Values.Contains(toNodeID));
+                bestPathMap[fromNodeID] = toNodeID;
+                pos = backPos;
+                bestIDX = backIDX;
+            }
+        }
+
+        private string FormatNodes(JapaneseTokenizer tok, WrappedPositionArray positions, int startPos, Position endPosData, char[] fragment)
+        {
+            StringBuilder sb = new StringBuilder();
+            // Output nodes
+            for (int pos = startPos + 1; pos <= endPosData.pos; pos++)
+            {
+                Position posData = positions.Get(pos);
+                for (int idx = 0; idx < posData.count; idx++)
+                {
+                    sb.Append("  ");
+                    sb.Append(GetNodeID(pos, idx));
+                    sb.Append(" [label=\"");
+                    sb.Append(pos);
+                    sb.Append(": ");
+                    sb.Append(posData.lastRightID[idx]);
+                    sb.Append("\"]\n");
+                }
+            }
+
+            // Output arcs
+            for (int pos = endPosData.pos; pos > startPos; pos--)
+            {
+                Position posData = positions.Get(pos);
+                for (int idx = 0; idx < posData.count; idx++)
+                {
+                    Position backPosData = positions.Get(posData.backPos[idx]);
+                    string toNodeID = GetNodeID(pos, idx);
+                    string fromNodeID = GetNodeID(posData.backPos[idx], posData.backIndex[idx]);
+
+                    sb.Append("  ");
+                    sb.Append(fromNodeID);
+                    sb.Append(" -> ");
+                    sb.Append(toNodeID);
+
+                    string attrs;
+                    string path;
+                    bestPathMap.TryGetValue(fromNodeID, out path);
+                    if (toNodeID.Equals(path))
+                    {
+                        // This arc is on best path
+                        attrs = " color=\"#40e050\" fontcolor=\"#40a050\" penwidth=3 fontsize=20";
+                    }
+                    else
+                    {
+                        attrs = "";
+                    }
+
+                    IDictionary dict = tok.GetDict(posData.backType[idx]);
+                    int wordCost = dict.GetWordCost(posData.backID[idx]);
+                    int bgCost = costs.Get(backPosData.lastRightID[posData.backIndex[idx]],
+                                                 dict.GetLeftId(posData.backID[idx]));
+
+                    string surfaceForm = new string(fragment,
+                                                          posData.backPos[idx] - startPos,
+                                                          pos - posData.backPos[idx]);
+
+                    sb.Append(" [label=\"");
+                    sb.Append(surfaceForm);
+                    sb.Append(' ');
+                    sb.Append(wordCost);
+                    if (bgCost >= 0)
+                    {
+                        sb.Append('+');
+                    }
+                    sb.Append(bgCost);
+                    sb.Append("\"");
+                    sb.Append(attrs);
+                    sb.Append("]\n");
+                }
+            }
+            return sb.ToString();
+        }
+
+        private string FormatHeader()
+        {
+            StringBuilder sb = new StringBuilder();
+            sb.Append("digraph viterbi {\n");
+            sb.Append("  graph [ fontsize=30 labelloc=\"t\" label=\"\" splines=true overlap=false rankdir = \"LR\"];\n");
+            //sb.Append("  // A2 paper size\n");
+            //sb.Append("  size = \"34.4,16.5\";\n");
+            //sb.Append("  // try to fill paper\n");
+            //sb.Append("  ratio = fill;\n");
+            sb.Append("  edge [ fontname=\"" + FONT_NAME + "\" fontcolor=\"red\" color=\"#606060\" ]\n");
+            sb.Append("  node [ style=\"filled\" fillcolor=\"#e8e8f0\" shape=\"Mrecord\" fontname=\"" + FONT_NAME + "\" ]\n");
+
+            return sb.ToString();
+        }
+
+        private string FormatTrailer()
+        {
+            return "}";
+        }
+
+        private string GetNodeID(int pos, int idx)
+        {
+            return pos + "." + idx;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapaneseAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseAnalyzer.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseAnalyzer.cs
new file mode 100644
index 0000000..dccf5ad
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseAnalyzer.cs
@@ -0,0 +1,103 @@
+using Lucene.Net.Analysis.Cjk;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Ja.Dict;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Analyzer for Japanese that uses morphological analysis.
+    /// </summary>
+    /// <seealso cref="JapaneseTokenizer"/>
+    public class JapaneseAnalyzer : StopwordAnalyzerBase
+    {
+        private readonly JapaneseTokenizerMode mode;
+        private readonly ISet<string> stoptags;
+        private readonly UserDictionary userDict;
+
+        public JapaneseAnalyzer(LuceneVersion matchVersion)
+            : this(matchVersion, null, JapaneseTokenizer.DEFAULT_MODE, DefaultSetHolder.DEFAULT_STOP_SET, DefaultSetHolder.DEFAULT_STOP_TAGS)
+        {
+        }
+
+        public JapaneseAnalyzer(LuceneVersion matchVersion, UserDictionary userDict, JapaneseTokenizerMode mode, CharArraySet stopwords, ISet<string> stoptags)
+            : base(matchVersion, stopwords)
+        {
+            this.userDict = userDict;
+            this.mode = mode;
+            this.stoptags = stoptags;
+        }
+
+        public static CharArraySet GetDefaultStopSet()
+        {
+            return DefaultSetHolder.DEFAULT_STOP_SET;
+        }
+
+        public static ISet<string> GetDefaultStopTags()
+        {
+            return DefaultSetHolder.DEFAULT_STOP_TAGS;
+        }
+
+        /// <summary>
+        /// Atomically loads DEFAULT_STOP_SET, DEFAULT_STOP_TAGS in a lazy fashion once the 
+        /// outer class accesses the static final set the first time.
+        /// </summary>
+        private static class DefaultSetHolder
+        {
+            internal static readonly CharArraySet DEFAULT_STOP_SET;
+            internal static readonly ISet<string> DEFAULT_STOP_TAGS;
+
+            static DefaultSetHolder()
+            {
+                try
+                {
+                    DEFAULT_STOP_SET = LoadStopwordSet(true, typeof(JapaneseAnalyzer), "stopwords.txt", "#");  // ignore case
+                    CharArraySet tagset = LoadStopwordSet(false, typeof(JapaneseAnalyzer), "stoptags.txt", "#");
+                    DEFAULT_STOP_TAGS = new HashSet<string>();
+                    foreach (string element in tagset)
+                    {
+                        DEFAULT_STOP_TAGS.Add(element);
+                    }
+                }
+                catch (IOException ex)
+                {
+                    // default set should always be present as it is part of the distribution (JAR)
+                    throw new Exception("Unable to load default stopword or stoptag set", ex);
+                }
+            }
+        }
+
+        protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+        {
+            Tokenizer tokenizer = new JapaneseTokenizer(reader, userDict, true, mode);
+            TokenStream stream = new JapaneseBaseFormFilter(tokenizer);
+            stream = new JapanesePartOfSpeechStopFilter(m_matchVersion, stream, stoptags);
+            stream = new CJKWidthFilter(stream);
+            stream = new StopFilter(m_matchVersion, stream, m_stopwords);
+            stream = new JapaneseKatakanaStemFilter(stream);
+            stream = new LowerCaseFilter(m_matchVersion, stream);
+            return new TokenStreamComponents(tokenizer, stream);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/JapaneseBaseFormFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseBaseFormFilter.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseBaseFormFilter.cs
new file mode 100644
index 0000000..2117737
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseBaseFormFilter.cs
@@ -0,0 +1,65 @@
+using Lucene.Net.Analysis.Ja.TokenAttributes;
+using Lucene.Net.Analysis.TokenAttributes;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Replaces term text with the <see cref="IBaseFormAttribute"/>.
+    /// <para/>
+    /// This acts as a lemmatizer for verbs and adjectives.
+    /// To prevent terms from being stemmed use an instance of
+    /// <see cref="SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+    /// the <see cref="IKeywordAttribute"/> before this <see cref="TokenStream"/>.
+    /// </summary>
+    public sealed class JapaneseBaseFormFilter : TokenFilter
+    {
+        private readonly ICharTermAttribute termAtt;
+        private readonly IBaseFormAttribute basicFormAtt;
+        private readonly IKeywordAttribute keywordAtt;
+
+        public JapaneseBaseFormFilter(TokenStream input)
+            : base(input)
+        {
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+            this.basicFormAtt = AddAttribute<IBaseFormAttribute>();
+            this.keywordAtt = AddAttribute<IKeywordAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            if (m_input.IncrementToken())
+            {
+                if (!keywordAtt.IsKeyword)
+                {
+                    string baseForm = basicFormAtt.GetBaseForm();
+                    if (baseForm != null)
+                    {
+                        termAtt.SetEmpty().Append(baseForm);
+                    }
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+}

[06/13] lucenenet git commit: Ported Lucene.Net.Analysis.Kuromoji + tests

Posted by ni...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Lucene.Net.Tests.Analysis.Kuromoji.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Lucene.Net.Tests.Analysis.Kuromoji.csproj b/src/Lucene.Net.Tests.Analysis.Kuromoji/Lucene.Net.Tests.Analysis.Kuromoji.csproj
new file mode 100644
index 0000000..e2cbbb8
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Lucene.Net.Tests.Analysis.Kuromoji.csproj
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{34A2BCE8-1351-43BD-A365-F50E7C0B2C49}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Analysis.Ja</RootNamespace>
+    <AssemblyName>Lucene.Net.Tests.Analysis.Kuromoji</AssemblyName>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Net.Http" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Dict\TestTokenInfoDictionary.cs" />
+    <Compile Include="Dict\UserDictionaryTest.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="StringMockResourceLoader.cs" />
+    <Compile Include="Support\TestApiConsistency.cs" />
+    <Compile Include="Support\TestExceptionSerialization.cs" />
+    <Compile Include="TestExtendedMode.cs" />
+    <Compile Include="TestJapaneseAnalyzer.cs" />
+    <Compile Include="TestJapaneseBaseFormFilter.cs" />
+    <Compile Include="TestJapaneseBaseFormFilterFactory.cs" />
+    <Compile Include="TestJapaneseIterationMarkCharFilter.cs" />
+    <Compile Include="TestJapaneseIterationMarkCharFilterFactory.cs" />
+    <Compile Include="TestJapaneseKatakanaStemFilter.cs" />
+    <Compile Include="TestJapaneseKatakanaStemFilterFactory.cs" />
+    <Compile Include="TestJapanesePartOfSpeechStopFilterFactory.cs" />
+    <Compile Include="TestJapaneseReadingFormFilter.cs" />
+    <Compile Include="TestJapaneseReadingFormFilterFactory.cs" />
+    <Compile Include="TestJapaneseTokenizer.cs" />
+    <Compile Include="TestJapaneseTokenizerFactory.cs" />
+    <Compile Include="TestSearchMode.cs" />
+    <Compile Include="Tools\UnknownDictionaryTest.cs" />
+    <Compile Include="Util\TestToStringUtil.cs" />
+    <Compile Include="..\CommonAssemblyInfo.cs">
+      <Link>Properties\CommonAssemblyInfo.cs</Link>
+    </Compile>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+      <Project>{4ADD0BBC-B900-4715-9526-D871DE8EEA64}</Project>
+      <Name>Lucene.Net.Analysis.Common</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net.Analysis.Kuromoji\Lucene.Net.Analysis.Kuromoji.csproj">
+      <Project>{8408625A-2508-46D5-8519-045183C43724}</Project>
+      <Name>Lucene.Net.Analysis.Kuromoji</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net.TestFramework\Lucene.Net.TestFramework.csproj">
+      <Project>{B2C0D749-CE34-4F62-A15E-00CB2FF5DDB3}</Project>
+      <Name>Lucene.Net.TestFramework</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj">
+      <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="bocchan.utf-8" />
+    <None Include="Lucene.Net.Tests.Analysis.Kuromoji.project.json" />
+  </ItemGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="search-segmentation-tests.txt" />
+    <EmbeddedResource Include="userdict.txt" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Lucene.Net.Tests.Analysis.Kuromoji.project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Lucene.Net.Tests.Analysis.Kuromoji.project.json b/src/Lucene.Net.Tests.Analysis.Kuromoji/Lucene.Net.Tests.Analysis.Kuromoji.project.json
new file mode 100644
index 0000000..8c631ab
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Lucene.Net.Tests.Analysis.Kuromoji.project.json
@@ -0,0 +1,11 @@
+{
+  "runtimes": {
+    "win": {}
+  },
+  "dependencies": {
+    "NUnit": "3.5.0"
+  },
+  "frameworks": {
+    "net451": {}
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Lucene.Net.Tests.Analysis.Kuromoji.xproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Lucene.Net.Tests.Analysis.Kuromoji.xproj b/src/Lucene.Net.Tests.Analysis.Kuromoji/Lucene.Net.Tests.Analysis.Kuromoji.xproj
new file mode 100644
index 0000000..f35a3f2
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Lucene.Net.Tests.Analysis.Kuromoji.xproj
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<Project ToolsVersion="14.0.25420" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0.25420</VisualStudioVersion>
+    <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" Condition="'$(VSToolsPath)' != ''" />
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>f82f0f31-09e7-48fb-b5ff-f3a84627a307</ProjectGuid>
+    <RootNamespace>Lucene.Net.Analysis.Ja</RootNamespace>
+    <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">.\obj</BaseIntermediateOutputPath>
+    <OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup>
+    <SchemaVersion>2.0</SchemaVersion>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82a7f48d-3b50-4b1e-b82e-3ada8210c358}" />
+  </ItemGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" Condition="'$(VSToolsPath)' != ''" />
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Properties/AssemblyInfo.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..4c466cf
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Properties/AssemblyInfo.cs
@@ -0,0 +1,38 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Tests.Analysis.Kuromoji")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("34a2bce8-1351-43bd-a365-f50e7c0b2c49")]
+
+// NOTE: Version information is in CommonAssemblyInfo.cs

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/StringMockResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/StringMockResourceLoader.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/StringMockResourceLoader.cs
new file mode 100644
index 0000000..49819cd
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/StringMockResourceLoader.cs
@@ -0,0 +1,67 @@
+using Lucene.Net.Analysis.Util;
+using System;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>Fake resource loader for tests: works if you want to fake reading a single file</summary>
+    internal class StringMockResourceLoader : IResourceLoader
+    {
+        String text;
+
+        public StringMockResourceLoader(String text)
+        {
+            this.text = text;
+        }
+
+        public virtual Type FindType(String cname)
+        {
+            try
+            {
+                //return Class.forName(cname).asSubclass(expectedType);
+                return Type.GetType(cname);
+            }
+            catch (Exception e)
+            {
+                throw new Exception("Cannot load class: " + cname, e);
+            }
+        }
+
+        public virtual T NewInstance<T>(String cname)
+        {
+            Type clazz = FindType(cname);
+            try
+            {
+                //return clazz.NewInstance();
+                return (T)Activator.CreateInstance(clazz);
+            }
+            catch (Exception e)
+            {
+                throw new Exception("Cannot create instance: " + cname, e);
+            }
+        }
+
+        public virtual Stream OpenResource(String resource)
+        {
+            return new MemoryStream(Encoding.UTF8.GetBytes(text));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Support/TestApiConsistency.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Support/TestApiConsistency.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/Support/TestApiConsistency.cs
new file mode 100644
index 0000000..de33cf3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Support/TestApiConsistency.cs
@@ -0,0 +1,150 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using Lucene.Net.Attributes;
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Ja.Support
+{
+    /// <summary>
+    /// LUCENENET specific tests for ensuring API conventions are followed
+    /// </summary>
+    public class TestApiConsistency : ApiScanTestBase
+    {
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestProtectedFieldNames(Type typeFromTargetAssembly)
+        {
+            base.TestProtectedFieldNames(typeFromTargetAssembly);
+        }
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestPrivateFieldNames(Type typeFromTargetAssembly)
+        {
+            base.TestPrivateFieldNames(typeFromTargetAssembly);
+        }
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestPublicFields(Type typeFromTargetAssembly)
+        {
+            base.TestPublicFields(typeFromTargetAssembly);
+        }
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestMethodParameterNames(Type typeFromTargetAssembly)
+        {
+            base.TestMethodParameterNames(typeFromTargetAssembly);
+        }
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestInterfaceNames(Type typeFromTargetAssembly)
+        {
+            base.TestInterfaceNames(typeFromTargetAssembly);
+        }
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestClassNames(Type typeFromTargetAssembly)
+        {
+            base.TestClassNames(typeFromTargetAssembly);
+        }
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestForPropertiesWithNoGetter(Type typeFromTargetAssembly)
+        {
+            base.TestForPropertiesWithNoGetter(typeFromTargetAssembly);
+        }
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestForPropertiesThatReturnArray(Type typeFromTargetAssembly)
+        {
+            base.TestForPropertiesThatReturnArray(typeFromTargetAssembly);
+        }
+
+#if !NETSTANDARD
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestForMethodsThatReturnWritableArray(Type typeFromTargetAssembly)
+        {
+            base.TestForMethodsThatReturnWritableArray(typeFromTargetAssembly);
+        }
+#endif
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestForPublicMembersContainingComparer(Type typeFromTargetAssembly)
+        {
+            base.TestForPublicMembersContainingComparer(typeFromTargetAssembly);
+        }
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestForPublicMembersNamedSize(Type typeFromTargetAssembly)
+        {
+            base.TestForPublicMembersNamedSize(typeFromTargetAssembly);
+        }
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestForPublicMembersContainingNonNetNumeric(Type typeFromTargetAssembly)
+        {
+            base.TestForPublicMembersContainingNonNetNumeric(typeFromTargetAssembly);
+        }
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestForTypesContainingNonNetNumeric(Type typeFromTargetAssembly)
+        {
+            base.TestForTypesContainingNonNetNumeric(typeFromTargetAssembly);
+        }
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestForPublicMembersWithNullableEnum(Type typeFromTargetAssembly)
+        {
+            base.TestForPublicMembersWithNullableEnum(typeFromTargetAssembly);
+        }
+
+        // LUCENENET NOTE: This test is only for identifying members who were changed from
+        // ICollection, IList or ISet to IEnumerable during the port (that should be changed back)
+        //[Test, LuceneNetSpecific]
+        //[TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        //public override void TestForMembersAcceptingOrReturningIEnumerable(Type typeFromTargetAssembly)
+        //{
+        //    base.TestForMembersAcceptingOrReturningIEnumerable(typeFromTargetAssembly);
+        //}
+
+        [Test, LuceneNetSpecific]
+        [TestCase(typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter))]
+        public override void TestForMembersAcceptingOrReturningListOrDictionary(Type typeFromTargetAssembly)
+        {
+            base.TestForMembersAcceptingOrReturningListOrDictionary(typeFromTargetAssembly);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Support/TestExceptionSerialization.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Support/TestExceptionSerialization.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/Support/TestExceptionSerialization.cs
new file mode 100644
index 0000000..7dcea20
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Support/TestExceptionSerialization.cs
@@ -0,0 +1,54 @@
+#if FEATURE_SERIALIZABLE
+using Lucene.Net.Attributes;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Lucene.Net.Support
+{
+    /*
+    * Licensed to the Apache Software Foundation (ASF) under one or more
+    * contributor license agreements.  See the NOTICE file distributed with
+    * this work for additional information regarding copyright ownership.
+    * The ASF licenses this file to You under the Apache License, Version 2.0
+    * (the "License"); you may not use this file except in compliance with
+    * the License.  You may obtain a copy of the License at
+    *
+    *     http://www.apache.org/licenses/LICENSE-2.0
+    *
+    * Unless required by applicable law or agreed to in writing, software
+    * distributed under the License is distributed on an "AS IS" BASIS,
+    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    * See the License for the specific language governing permissions and
+    * limitations under the License.
+    */
+
+    [TestFixture]
+    public class TestExceptionSerialization : ExceptionSerializationTestBase
+    {
+        public static IEnumerable<object> ExceptionTestData
+        {
+            get
+            {
+                var exceptionTypes = typeof(Lucene.Net.Analysis.Ja.GraphvizFormatter).Assembly.GetTypes().Where(t => typeof(Exception).IsAssignableFrom(t)).Cast<object>();
+
+                // If the assembly has no exceptions, just provide Exception so the test will pass
+                if (!exceptionTypes.Any())
+                {
+                    return new Type[] { typeof(Exception) };
+                }
+
+                return exceptionTypes;
+            }
+        }
+
+        [Test, LuceneNetSpecific]
+        public void AllExceptionsInLuceneNamespaceCanSerialize([ValueSource("ExceptionTestData")]Type luceneException)
+        {
+            var instance = TryInstantiate(luceneException);
+            Assert.That(TypeCanSerialize(instance), string.Format("Unable to serialize {0}", luceneException.FullName));
+        }
+    }
+}
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/TestExtendedMode.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/TestExtendedMode.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestExtendedMode.cs
new file mode 100644
index 0000000..f4db3d9
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/TestExtendedMode.cs
@@ -0,0 +1,82 @@
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Ja
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TestExtendedMode : BaseTokenStreamTestCase
+    {
+        private readonly Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizerMode.EXTENDED);
+            return new TokenStreamComponents(tokenizer, tokenizer);
+        });
+
+        /** simple test for supplementary characters */
+        [Test]
+        public void TestSurrogates()
+        {
+            AssertAnalyzesTo(analyzer, "𩬅艱鍟䇹愯瀛",
+          new String[] { "𩬅", "艱", "鍟", "䇹", "愯", "瀛" });
+        }
+
+        /** random test ensuring we don't ever split supplementaries */
+        [Test]
+        public void TestSurrogates2()
+        {
+            int numIterations = AtLeast(1000);
+            for (int i = 0; i < numIterations; i++)
+            {
+                String s = TestUtil.RandomUnicodeString(Random(), 100);
+                TokenStream ts = analyzer.GetTokenStream("foo", s);
+                try
+                {
+                    ICharTermAttribute termAtt = ts.AddAttribute<ICharTermAttribute>();
+                    ts.Reset();
+                    while (ts.IncrementToken())
+                    {
+                        assertTrue(UnicodeUtil.ValidUTF16String(termAtt));
+                    }
+                    ts.End();
+                }
+                finally
+                {
+                    IOUtils.DisposeWhileHandlingException(ts);
+                }
+            }
+        }
+
+        /** blast some random strings through the analyzer */
+        [Test]
+        public void TestRandomStrings()
+        {
+            Random random = Random();
+            CheckRandomData(random, analyzer, 1000 * RANDOM_MULTIPLIER);
+        }
+
+        /** blast some random large strings through the analyzer */
+        public void TestRandomHugeStrings()
+        {
+            Random random = Random();
+            CheckRandomData(random, analyzer, 100 * RANDOM_MULTIPLIER, 8192);
+        }
+    }
+}

[08/13] lucenenet git commit: Ported Lucene.Net.Analysis.Kuromoji + tests

Posted by ni...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/BaseFormAttributeImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/BaseFormAttributeImpl.cs b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/BaseFormAttributeImpl.cs
new file mode 100644
index 0000000..7121c73
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/BaseFormAttributeImpl.cs
@@ -0,0 +1,55 @@
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Ja.TokenAttributes
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Attribute for <see cref="Token.GetBaseForm()"/>.
+    /// </summary>
+    public class BaseFormAttribute : Attribute, IBaseFormAttribute
+    {
+        private Token token;
+
+        public virtual string GetBaseForm()
+        {
+            return token == null ? null : token.GetBaseForm();
+        }
+
+        public virtual void SetToken(Token token)
+        {
+            this.token = token;
+        }
+
+        public override void Clear()
+        {
+            token = null;
+        }
+
+        public override void CopyTo(IAttribute target)
+        {
+            BaseFormAttribute t = (BaseFormAttribute)target;
+            t.SetToken(token);
+        }
+
+        public override void ReflectWith(IAttributeReflector reflector)
+        {
+            reflector.Reflect(typeof(BaseFormAttribute), "baseForm", GetBaseForm());
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/InflectionAttribute.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/InflectionAttribute.cs b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/InflectionAttribute.cs
new file mode 100644
index 0000000..975d8b0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/InflectionAttribute.cs
@@ -0,0 +1,34 @@
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Ja.TokenAttributes
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Attribute for Kuromoji inflection data.
+    /// <para/>
+    /// Note: in some cases this value may not be applicable,
+    /// and will be null.
+    /// </summary>
+    public interface IInflectionAttribute : IAttribute
+    {
+        string GetInflectionType();
+        string GetInflectionForm();
+        void SetToken(Token token);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/InflectionAttributeImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/InflectionAttributeImpl.cs b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/InflectionAttributeImpl.cs
new file mode 100644
index 0000000..db96130
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/InflectionAttributeImpl.cs
@@ -0,0 +1,68 @@
+using Lucene.Net.Analysis.Ja.Util;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Ja.TokenAttributes
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Attribute for Kuromoji inflection data.
+    /// </summary>
+    public class InflectionAttribute : Attribute, IInflectionAttribute
+    {
+        private Token token;
+
+        public virtual string GetInflectionType()
+        {
+            return token == null ? null : token.GetInflectionType();
+        }
+
+        public virtual string GetInflectionForm()
+        {
+            return token == null ? null : token.GetInflectionForm();
+        }
+
+        public virtual void SetToken(Token token)
+        {
+            this.token = token;
+        }
+
+        public override void Clear()
+        {
+            token = null;
+        }
+
+        public override void CopyTo(IAttribute target)
+        {
+            InflectionAttribute t = (InflectionAttribute)target;
+            t.SetToken(token);
+        }
+
+        public override void ReflectWith(IAttributeReflector reflector)
+        {
+            string type = GetInflectionType();
+            string typeEN = type == null ? null : ToStringUtil.GetInflectionTypeTranslation(type);
+            reflector.Reflect<IInflectionAttribute>("inflectionType", type);
+            reflector.Reflect<IInflectionAttribute>("inflectionType (en)", typeEN);
+            string form = GetInflectionForm();
+            string formEN = form == null ? null : ToStringUtil.GetInflectedFormTranslation(form);
+            reflector.Reflect<IInflectionAttribute>("inflectionForm", form);
+            reflector.Reflect<IInflectionAttribute>("inflectionForm (en)", formEN);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/PartOfSpeechAttribute.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/PartOfSpeechAttribute.cs b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/PartOfSpeechAttribute.cs
new file mode 100644
index 0000000..722d203
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/PartOfSpeechAttribute.cs
@@ -0,0 +1,30 @@
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Ja.TokenAttributes
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Attribute for <see cref="Token.GetPartOfSpeech()"/>.
+    /// </summary>
+    public interface IPartOfSpeechAttribute : IAttribute
+    {
+        string GetPartOfSpeech();
+        void SetToken(Token token);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/PartOfSpeechAttributeImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/PartOfSpeechAttributeImpl.cs b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/PartOfSpeechAttributeImpl.cs
new file mode 100644
index 0000000..9d0451d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/PartOfSpeechAttributeImpl.cs
@@ -0,0 +1,59 @@
+using Lucene.Net.Analysis.Ja.Util;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Ja.TokenAttributes
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Attribute for <see cref="Token.GetPartOfSpeech()"/>.
+    /// </summary>
+    public class PartOfSpeechAttribute : Attribute, IPartOfSpeechAttribute
+    {
+        private Token token;
+
+        public virtual string GetPartOfSpeech()
+        {
+            return token == null ? null : token.GetPartOfSpeech();
+        }
+
+        public virtual void SetToken(Token token)
+        {
+            this.token = token;
+        }
+
+        public override void Clear()
+        {
+            token = null;
+        }
+
+        public override void CopyTo(IAttribute target)
+        {
+            PartOfSpeechAttribute t = (PartOfSpeechAttribute)target;
+            t.SetToken(token);
+        }
+
+        public override void ReflectWith(IAttributeReflector reflector)
+        {
+            string partOfSpeech = GetPartOfSpeech();
+            string partOfSpeechEN = partOfSpeech == null ? null : ToStringUtil.GetPOSTranslation(partOfSpeech);
+            reflector.Reflect<IPartOfSpeechAttribute>("partOfSpeech", partOfSpeech);
+            reflector.Reflect<IPartOfSpeechAttribute>("partOfSpeech (en)", partOfSpeechEN);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/ReadingAttribute.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/ReadingAttribute.cs b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/ReadingAttribute.cs
new file mode 100644
index 0000000..2432872
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/ReadingAttribute.cs
@@ -0,0 +1,34 @@
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Ja.TokenAttributes
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Attribute for Kuromoji reading data
+    /// <para/>
+    /// Note: in some cases this value may not be applicable,
+    /// and will be null.
+    /// </summary>
+    public interface IReadingAttribute : IAttribute
+    {
+        string GetReading();
+        string GetPronunciation();
+        void SetToken(Token token);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/ReadingAttributeImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/ReadingAttributeImpl.cs b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/ReadingAttributeImpl.cs
new file mode 100644
index 0000000..bfb8a93
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/TokenAttributes/ReadingAttributeImpl.cs
@@ -0,0 +1,68 @@
+using Lucene.Net.Analysis.Ja.Util;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Ja.TokenAttributes
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Attribute for Kuromoji reading data
+    /// </summary>
+    public class ReadingAttribute : Attribute, IReadingAttribute
+    {
+        private Token token;
+
+        public virtual string GetReading()
+        {
+            return token == null ? null : token.GetReading();
+        }
+
+        public virtual string GetPronunciation()
+        {
+            return token == null ? null : token.GetPronunciation();
+        }
+
+        public virtual void SetToken(Token token)
+        {
+            this.token = token;
+        }
+
+        public override void Clear()
+        {
+            token = null;
+        }
+
+        public override void CopyTo(IAttribute target)
+        {
+            ReadingAttribute t = (ReadingAttribute)target;
+            t.SetToken(token);
+        }
+
+        public override void ReflectWith(IAttributeReflector reflector)
+        {
+            string reading = GetReading();
+            string readingEN = reading == null ? null : ToStringUtil.GetRomanization(reading);
+            string pronunciation = GetPronunciation();
+            string pronunciationEN = pronunciation == null ? null : ToStringUtil.GetRomanization(pronunciation);
+            reflector.Reflect<IReadingAttribute>("reading", reading);
+            reflector.Reflect<IReadingAttribute>("reading (en)", readingEN);
+            reflector.Reflect<IReadingAttribute>("pronunciation", pronunciation);
+            reflector.Reflect<IReadingAttribute>("pronunciation (en)", pronunciationEN);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Tools/BinaryDictionaryWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/BinaryDictionaryWriter.cs b/src/Lucene.Net.Analysis.Kuromoji/Tools/BinaryDictionaryWriter.cs
new file mode 100644
index 0000000..39d36eb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/BinaryDictionaryWriter.cs
@@ -0,0 +1,370 @@
+using Lucene.Net.Analysis.Ja.Dict;
+using Lucene.Net.Codecs;
+using Lucene.Net.Store;
+using Lucene.Net.Support.IO;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Globalization;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public abstract class BinaryDictionaryWriter
+    {
+        protected readonly Type m_implClazz;
+        protected ByteBuffer m_buffer;
+        private int targetMapEndOffset = 0, lastWordId = -1, lastSourceId = -1;
+        private int[] targetMap = new int[8192];
+        private int[] targetMapOffsets = new int[8192];
+        private readonly List<string> posDict = new List<string>();
+
+        public BinaryDictionaryWriter(Type implClazz, int size)
+        {
+            this.m_implClazz = implClazz;
+            m_buffer = ByteBuffer.Allocate(size);
+        }
+
+        /// <summary>
+        /// Put the entry in map.
+        /// </summary>
+        /// <param name="entry"></param>
+        /// <returns>Current position of buffer, which will be wordId of next entry.</returns>
+        public virtual int Put(string[] entry)
+        {
+            short leftId = short.Parse(entry[1], CultureInfo.InvariantCulture);
+            short rightId = short.Parse(entry[2], CultureInfo.InvariantCulture);
+            short wordCost = short.Parse(entry[3], CultureInfo.InvariantCulture);
+
+            StringBuilder sb = new StringBuilder();
+
+            // build up the POS string
+            for (int i = 4; i < 8; i++)
+            {
+                string part = entry[i];
+                Debug.Assert(part.Length > 0);
+                if (!"*".Equals(part, StringComparison.Ordinal))
+                {
+                    if (sb.Length > 0)
+                    {
+                        sb.Append('-');
+                    }
+                    sb.Append(part);
+                }
+            }
+
+            string posData = sb.ToString();
+
+            sb.Length = 0;
+            sb.Append(CSVUtil.QuoteEscape(posData));
+            sb.Append(',');
+            if (!"*".Equals(entry[8]))
+            {
+                sb.Append(CSVUtil.QuoteEscape(entry[8]));
+            }
+            sb.Append(',');
+            if (!"*".Equals(entry[9]))
+            {
+                sb.Append(CSVUtil.QuoteEscape(entry[9]));
+            }
+            string fullPOSData = sb.ToString();
+
+            string baseForm = entry[10];
+            string reading = entry[11];
+            string pronunciation = entry[12];
+
+            // extend buffer if necessary
+            int left = m_buffer.Remaining;
+            // worst case: two short, 3 bytes, and features (all as utf-16)
+            int worstCase = 4 + 3 + 2 * (baseForm.Length + reading.Length + pronunciation.Length);
+            if (worstCase > left)
+            {
+                ByteBuffer newBuffer = ByteBuffer.Allocate(ArrayUtil.Oversize(m_buffer.Limit + worstCase - left, 1));
+                m_buffer.Flip();
+                newBuffer.Put(m_buffer);
+                m_buffer = newBuffer;
+            }
+
+            int flags = 0;
+            if (!("*".Equals(baseForm) || baseForm.Equals(entry[0])))
+            {
+                flags |= BinaryDictionary.HAS_BASEFORM;
+            }
+            if (!reading.Equals(ToKatakana(entry[0])))
+            {
+                flags |= BinaryDictionary.HAS_READING;
+            }
+            if (!pronunciation.Equals(reading))
+            {
+                flags |= BinaryDictionary.HAS_PRONUNCIATION;
+            }
+
+            Debug.Assert(leftId == rightId);
+            Debug.Assert(leftId < 4096); // there are still unused bits
+                                         // add pos mapping
+            int toFill = 1 + leftId - posDict.Count;
+            for (int i = 0; i < toFill; i++)
+            {
+                posDict.Add(null);
+            }
+
+            string existing = posDict[leftId];
+            Debug.Assert(existing == null || existing.Equals(fullPOSData));
+            posDict[leftId] = fullPOSData;
+
+            m_buffer.PutInt16((short)(leftId << 3 | flags));
+            m_buffer.PutInt16(wordCost);
+
+            if ((flags & BinaryDictionary.HAS_BASEFORM) != 0)
+            {
+                Debug.Assert(baseForm.Length < 16);
+                int shared = SharedPrefix(entry[0], baseForm);
+                int suffix = baseForm.Length - shared;
+                m_buffer.Put((byte)(shared << 4 | suffix));
+                for (int i = shared; i < baseForm.Length; i++)
+                {
+                    m_buffer.PutChar(baseForm[i]);
+                }
+            }
+
+            if ((flags & BinaryDictionary.HAS_READING) != 0)
+            {
+                if (IsKatakana(reading))
+                {
+                    m_buffer.Put((byte)(reading.Length << 1 | 1));
+                    WriteKatakana(reading);
+                }
+                else
+                {
+                    m_buffer.Put((byte)(reading.Length << 1));
+                    for (int i = 0; i < reading.Length; i++)
+                    {
+                        m_buffer.PutChar(reading[i]);
+                    }
+                }
+            }
+
+            if ((flags & BinaryDictionary.HAS_PRONUNCIATION) != 0)
+            {
+                // we can save 150KB here, but it makes the reader a little complicated.
+                // int shared = sharedPrefix(reading, pronunciation);
+                // buffer.put((byte) shared);
+                // pronunciation = pronunciation.substring(shared);
+                if (IsKatakana(pronunciation))
+                {
+                    m_buffer.Put((byte)(pronunciation.Length << 1 | 1));
+                    WriteKatakana(pronunciation);
+                }
+                else
+                {
+                    m_buffer.Put((byte)(pronunciation.Length << 1));
+                    for (int i = 0; i < pronunciation.Length; i++)
+                    {
+                        m_buffer.PutChar(pronunciation[i]);
+                    }
+                }
+            }
+
+            return m_buffer.Position;
+        }
+
+        private bool IsKatakana(string s)
+        {
+            for (int i = 0; i < s.Length; i++)
+            {
+                char ch = s[i];
+                if (ch < 0x30A0 || ch > 0x30FF)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private void WriteKatakana(string s)
+        {
+            for (int i = 0; i < s.Length; i++)
+            {
+                m_buffer.Put((byte)(s[i] - 0x30A0));
+            }
+        }
+
+        private string ToKatakana(string s)
+        {
+            char[] text = new char[s.Length];
+            for (int i = 0; i < s.Length; i++)
+            {
+                char ch = s[i];
+                if (ch > 0x3040 && ch < 0x3097)
+                {
+                    text[i] = (char)(ch + 0x60);
+                }
+                else
+                {
+                    text[i] = ch;
+                }
+            }
+            return new string(text);
+        }
+
+        public static int SharedPrefix(string left, string right)
+        {
+            int len = left.Length < right.Length ? left.Length : right.Length;
+            for (int i = 0; i < len; i++)
+                if (left[i] != right[i])
+                    return i;
+            return len;
+        }
+
+        public virtual void AddMapping(int sourceId, int wordId)
+        {
+            Debug.Assert(wordId > lastWordId, "words out of order: " + wordId + " vs lastID: " + lastWordId);
+
+            if (sourceId > lastSourceId)
+            {
+                Debug.Assert(sourceId > lastSourceId, "source ids out of order: lastSourceId=" + lastSourceId + " vs sourceId=" + sourceId);
+                targetMapOffsets = ArrayUtil.Grow(targetMapOffsets, sourceId + 1);
+                for (int i = lastSourceId + 1; i <= sourceId; i++)
+                {
+                    targetMapOffsets[i] = targetMapEndOffset;
+                }
+            }
+            else
+            {
+                Debug.Assert(sourceId == lastSourceId);
+            }
+
+            targetMap = ArrayUtil.Grow(targetMap, targetMapEndOffset + 1);
+            targetMap[targetMapEndOffset] = wordId;
+            targetMapEndOffset++;
+
+            lastSourceId = sourceId;
+            lastWordId = wordId;
+        }
+
+        protected string GetBaseFileName(string baseDir)
+        {
+            return baseDir + System.IO.Path.DirectorySeparatorChar + m_implClazz.FullName.Replace('.', System.IO.Path.DirectorySeparatorChar);
+        }
+
+        /// <summary>
+        /// Write dictionary in file
+        /// </summary>
+        /// <remarks>
+        /// Dictionary format is:
+        /// [Size of dictionary(int)], [entry:{left id(short)}{right id(short)}{word cost(short)}{length of pos info(short)}{pos info(char)}], [entry...], [entry...].....
+        /// </remarks>
+        /// <param name="baseDir"></param>
+        /// <exception cref="IOException">If an I/O error occurs writing the dictionary files.</exception>
+        public virtual void Write(string baseDir)
+        {
+            string baseName = GetBaseFileName(baseDir);
+            WriteDictionary(baseName + BinaryDictionary.DICT_FILENAME_SUFFIX);
+            WriteTargetMap(baseName + BinaryDictionary.TARGETMAP_FILENAME_SUFFIX);
+            WritePosDict(baseName + BinaryDictionary.POSDICT_FILENAME_SUFFIX);
+        }
+
+        // TODO: maybe this int[] should instead be the output to the FST...
+        protected virtual void WriteTargetMap(string filename)
+        {
+            //new File(filename).getParentFile().mkdirs();
+            System.IO.Directory.CreateDirectory(System.IO.Path.GetDirectoryName(filename));
+            using (Stream os = new FileStream(filename, FileMode.Create, FileAccess.Write))
+            {
+                DataOutput @out = new OutputStreamDataOutput(os);
+                CodecUtil.WriteHeader(@out, BinaryDictionary.TARGETMAP_HEADER, BinaryDictionary.VERSION);
+
+                int numSourceIds = lastSourceId + 1;
+                @out.WriteVInt32(targetMapEndOffset); // <-- size of main array
+                @out.WriteVInt32(numSourceIds + 1); // <-- size of offset array (+ 1 more entry)
+                int prev = 0, sourceId = 0;
+                for (int ofs = 0; ofs < targetMapEndOffset; ofs++)
+                {
+                    int val = targetMap[ofs], delta = val - prev;
+                    Debug.Assert(delta >= 0);
+                    if (ofs == targetMapOffsets[sourceId])
+                    {
+                        @out.WriteVInt32((delta << 1) | 0x01);
+                        sourceId++;
+                    }
+                    else
+                    {
+                        @out.WriteVInt32((delta << 1));
+                    }
+                    prev += delta;
+                }
+                Debug.Assert(sourceId == numSourceIds, "sourceId:" + sourceId + " != numSourceIds:" + numSourceIds);
+            }
+        }
+
+        protected virtual void WritePosDict(string filename)
+        {
+            //new File(filename).getParentFile().mkdirs();
+            System.IO.Directory.CreateDirectory(System.IO.Path.GetDirectoryName(filename));
+            using (Stream os = new FileStream(filename, FileMode.Create, FileAccess.Write))
+            {
+                DataOutput @out = new OutputStreamDataOutput(os);
+                CodecUtil.WriteHeader(@out, BinaryDictionary.POSDICT_HEADER, BinaryDictionary.VERSION);
+                @out.WriteVInt32(posDict.Count);
+                foreach (string s in posDict)
+                {
+                    if (s == null)
+                    {
+                        @out.WriteByte((byte)0);
+                        @out.WriteByte((byte)0);
+                        @out.WriteByte((byte)0);
+                    }
+                    else
+                    {
+                        string[] data = CSVUtil.Parse(s);
+                        Debug.Assert(data.Length == 3, "malformed pos/inflection: " + s);
+                        @out.WriteString(data[0]);
+                        @out.WriteString(data[1]);
+                        @out.WriteString(data[2]);
+                    }
+                }
+            }
+        }
+
+        protected virtual void WriteDictionary(string filename)
+        {
+            //new File(filename).getParentFile().mkdirs();
+            System.IO.Directory.CreateDirectory(System.IO.Path.GetDirectoryName(filename));
+            using (Stream os = new FileStream(filename, FileMode.Create, FileAccess.Write))
+            {
+                DataOutput @out = new OutputStreamDataOutput(os);
+                CodecUtil.WriteHeader(@out, BinaryDictionary.DICT_HEADER, BinaryDictionary.VERSION);
+                @out.WriteVInt32(m_buffer.Position);
+                var writer = new BinaryWriter(os);
+
+                //WritableByteChannel channel = Channels.newChannel(os);
+                // Write Buffer
+                m_buffer.Flip();  // set position to 0, set limit to current position
+                //channel.write(buffer);
+
+                writer.Write(m_buffer.Array, m_buffer.Position, m_buffer.Limit);
+
+                Debug.Assert(m_buffer.Remaining == 0L);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Tools/CharacterDefinitionWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/CharacterDefinitionWriter.cs b/src/Lucene.Net.Analysis.Kuromoji/Tools/CharacterDefinitionWriter.cs
new file mode 100644
index 0000000..6d5526d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/CharacterDefinitionWriter.cs
@@ -0,0 +1,91 @@
+using Lucene.Net.Analysis.Ja.Dict;
+using Lucene.Net.Codecs;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public sealed class CharacterDefinitionWriter
+    {
+        private readonly byte[] characterCategoryMap = new byte[0x10000];
+
+        private readonly bool[] invokeMap = new bool[CharacterDefinition.CLASS_COUNT];
+        private readonly bool[] groupMap = new bool[CharacterDefinition.CLASS_COUNT];
+
+        /// <summary>
+        /// Constructor for building. TODO: remove write access
+        /// </summary>
+        public CharacterDefinitionWriter()
+        {
+            Arrays.Fill(characterCategoryMap, CharacterDefinition.DEFAULT);
+        }
+
+        /// <summary>
+        /// Put mapping from unicode code point to character class.
+        /// </summary>
+        /// <param name="codePoint">Code point.</param>
+        /// <param name="characterClassName">Character class name.</param>
+        public void PutCharacterCategory(int codePoint, string characterClassName)
+        {
+            characterClassName = characterClassName.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)[0]; // use first
+                                                                                                                         // category
+                                                                                                                         // class
+
+            // Override Nakaguro
+            if (codePoint == 0x30FB)
+            {
+                characterClassName = "SYMBOL";
+            }
+            characterCategoryMap[codePoint] = CharacterDefinition.LookupCharacterClass(characterClassName);
+        }
+
+        public void PutInvokeDefinition(string characterClassName, int invoke, int group, int length)
+        {
+            byte characterClass = CharacterDefinition.LookupCharacterClass(characterClassName);
+            invokeMap[characterClass] = invoke == 1;
+            groupMap[characterClass] = group == 1;
+            // TODO: length def ignored
+        }
+
+        public void Write(string baseDir)
+        {
+            string filename = baseDir + System.IO.Path.DirectorySeparatorChar +
+                typeof(CharacterDefinition).FullName.Replace('.', System.IO.Path.DirectorySeparatorChar) + CharacterDefinition.FILENAME_SUFFIX;
+            //new File(filename).getParentFile().mkdirs();
+            System.IO.Directory.CreateDirectory(System.IO.Path.GetDirectoryName(baseDir));
+            using (Stream os = new FileStream(filename, FileMode.Create, FileAccess.Write))
+            {
+                DataOutput @out = new OutputStreamDataOutput(os);
+                CodecUtil.WriteHeader(@out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
+                @out.WriteBytes(characterCategoryMap, 0, characterCategoryMap.Length);
+                for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++)
+                {
+                    byte b = (byte)(
+                      (invokeMap[i] ? 0x01 : 0x00) |
+                      (groupMap[i] ? 0x02 : 0x00)
+                    );
+                    @out.WriteByte(b);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsBuilder.cs b/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsBuilder.cs
new file mode 100644
index 0000000..235c523
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsBuilder.cs
@@ -0,0 +1,68 @@
+using System.Diagnostics;
+using System.Globalization;
+using System.IO;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class ConnectionCostsBuilder
+    {
+        private static readonly Regex whiteSpaceRegex = new Regex("\\s+", RegexOptions.Compiled);
+
+        private ConnectionCostsBuilder()
+        {
+        }
+
+        public static ConnectionCostsWriter Build(string filename)
+        {
+            using (Stream inputStream = new FileStream(filename, FileMode.Open, FileAccess.Read))
+            {
+                StreamReader streamReader = new StreamReader(inputStream, Encoding.ASCII);
+
+                string line = streamReader.ReadLine();
+                string[] dimensions = whiteSpaceRegex.Split(line);
+
+                Debug.Assert(dimensions.Length == 2);
+
+                int forwardSize = int.Parse(dimensions[0], CultureInfo.InvariantCulture);
+                int backwardSize = int.Parse(dimensions[1], CultureInfo.InvariantCulture);
+
+                Debug.Assert(forwardSize > 0 && backwardSize > 0);
+
+                ConnectionCostsWriter costs = new ConnectionCostsWriter(forwardSize, backwardSize);
+
+                while ((line = streamReader.ReadLine()) != null)
+                {
+                    string[] fields = whiteSpaceRegex.Split(line);
+
+                    Debug.Assert(fields.Length == 3);
+
+                    int forwardId = int.Parse(fields[0], CultureInfo.InvariantCulture);
+                    int backwardId = int.Parse(fields[1], CultureInfo.InvariantCulture);
+                    int cost = int.Parse(fields[2], CultureInfo.InvariantCulture);
+
+                    costs.Add(forwardId, backwardId, cost);
+                }
+                return costs;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsWriter.cs b/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsWriter.cs
new file mode 100644
index 0000000..bd1376e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsWriter.cs
@@ -0,0 +1,74 @@
+using Lucene.Net.Analysis.Ja.Dict;
+using Lucene.Net.Codecs;
+using Lucene.Net.Store;
+using System.Diagnostics;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public sealed class ConnectionCostsWriter
+    {
+        private readonly short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.
+        private readonly int forwardSize;
+        private readonly int backwardSize;
+        /// <summary>
+        /// Constructor for building. TODO: remove write access
+        /// </summary>
+        public ConnectionCostsWriter(int forwardSize, int backwardSize)
+        {
+            this.forwardSize = forwardSize;
+            this.backwardSize = backwardSize;
+            //this.costs = new short[backwardSize][forwardSize];
+            this.costs = Support.RectangularArrays.ReturnRectangularArray<short>(backwardSize, forwardSize);
+        }
+
+        public void Add(int forwardId, int backwardId, int cost)
+        {
+            this.costs[backwardId][forwardId] = (short)cost;
+        }
+
+        public void Write(string baseDir)
+        {
+            string filename = baseDir + System.IO.Path.DirectorySeparatorChar +
+                typeof(ConnectionCosts).FullName.Replace('.', System.IO.Path.DirectorySeparatorChar) + ConnectionCosts.FILENAME_SUFFIX;
+            //new File(filename).getParentFile().mkdirs();
+            System.IO.Directory.CreateDirectory(System.IO.Path.GetDirectoryName(filename));
+            using (Stream os = new FileStream(filename, FileMode.Create, FileAccess.Write))
+            {
+                DataOutput @out = new OutputStreamDataOutput(os);
+                CodecUtil.WriteHeader(@out, ConnectionCosts.HEADER, ConnectionCosts.VERSION);
+                @out.WriteVInt32(forwardSize);
+                @out.WriteVInt32(backwardSize);
+                int last = 0;
+                Debug.Assert(costs.Length == backwardSize);
+                foreach (short[] a in costs)
+                {
+                    Debug.Assert(a.Length == forwardSize);
+                    for (int i = 0; i < a.Length; i++)
+                    {
+                        int delta = (int)a[i] - last;
+                        @out.WriteVInt32((delta >> 31) ^ (delta << 1));
+                        last = a[i];
+                    }
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs b/src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs
new file mode 100644
index 0000000..c105039
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs
@@ -0,0 +1,92 @@
+using System;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class DictionaryBuilder
+    {
+        public enum DictionaryFormat { IPADIC, UNIDIC };
+
+        private DictionaryBuilder()
+        {
+        }
+
+        public static void Build(DictionaryFormat format,
+            string inputDirname,
+            string outputDirname,
+            string encoding,
+            bool normalizeEntry)
+        {
+            Console.WriteLine("building tokeninfo dict...");
+            TokenInfoDictionaryBuilder tokenInfoBuilder = new TokenInfoDictionaryBuilder(format, encoding, normalizeEntry);
+            TokenInfoDictionaryWriter tokenInfoDictionary = tokenInfoBuilder.Build(inputDirname);
+            tokenInfoDictionary.Write(outputDirname);
+            tokenInfoDictionary = null;
+            tokenInfoBuilder = null;
+            Console.WriteLine("done");
+
+            Console.WriteLine("building unknown word dict...");
+            UnknownDictionaryBuilder unkBuilder = new UnknownDictionaryBuilder(encoding);
+            UnknownDictionaryWriter unkDictionary = unkBuilder.Build(inputDirname);
+            unkDictionary.Write(outputDirname);
+            unkDictionary = null;
+            unkBuilder = null;
+            Console.WriteLine("done");
+
+            Console.WriteLine("building connection costs...");
+            ConnectionCostsWriter connectionCosts
+                = ConnectionCostsBuilder.Build(inputDirname + System.IO.Path.DirectorySeparatorChar + "matrix.def");
+            connectionCosts.Write(outputDirname);
+            Console.WriteLine("done");
+        }
+
+        public static void Main(string[] args)
+        {
+            DictionaryFormat format;
+            if (args[0].Equals("ipadic", StringComparison.OrdinalIgnoreCase))
+            {
+                format = DictionaryFormat.IPADIC;
+            }
+            else if (args[0].Equals("unidic", StringComparison.OrdinalIgnoreCase))
+            {
+                format = DictionaryFormat.UNIDIC;
+            }
+            else
+            {
+                Console.Error.WriteLine("Illegal format " + args[0] + " using unidic instead");
+                format = DictionaryFormat.IPADIC;
+            }
+
+            string inputDirname = args[1];
+            string outputDirname = args[2];
+            string inputEncoding = args[3];
+            bool normalizeEntries = bool.Parse(args[4]);
+
+            Console.WriteLine("dictionary builder");
+            Console.WriteLine();
+            Console.WriteLine("dictionary format: " + format);
+            Console.WriteLine("input directory: " + inputDirname);
+            Console.WriteLine("output directory: " + outputDirname);
+            Console.WriteLine("input encoding: " + inputEncoding);
+            Console.WriteLine("normalize entries: " + normalizeEntries);
+            Console.WriteLine();
+            DictionaryBuilder.Build(format, inputDirname, outputDirname, inputEncoding, normalizeEntries);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Tools/TokenInfoDictionaryBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/TokenInfoDictionaryBuilder.cs b/src/Lucene.Net.Analysis.Kuromoji/Tools/TokenInfoDictionaryBuilder.cs
new file mode 100644
index 0000000..0406083
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/TokenInfoDictionaryBuilder.cs
@@ -0,0 +1,230 @@
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Fst;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TokenInfoDictionaryBuilder
+    {
+        /// <summary>Internal word id - incrementally assigned as entries are read and added. This will be byte offset of dictionary file</summary>
+        private int offset = 0;
+
+        private string encoding = "euc-jp";
+
+        private bool normalizeEntries = false;
+        //private Normalizer2 normalizer;
+
+        private DictionaryBuilder.DictionaryFormat format = DictionaryBuilder.DictionaryFormat.IPADIC;
+
+        public TokenInfoDictionaryBuilder(DictionaryBuilder.DictionaryFormat format, string encoding, bool normalizeEntries)
+        {
+            this.format = format;
+            this.encoding = encoding;
+            this.normalizeEntries = normalizeEntries;
+            //this.normalizer = normalizeEntries ? Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE) : null;
+        }
+
+        public virtual TokenInfoDictionaryWriter Build(string dirname)
+        {
+            List<string> csvFiles = new List<string>();
+            foreach (FileInfo file in new DirectoryInfo(dirname).EnumerateFiles("*.csv"))
+            {
+                csvFiles.Add(file.FullName);
+            }
+            csvFiles.Sort();
+            return BuildDictionary(csvFiles);
+        }
+
+        public virtual TokenInfoDictionaryWriter BuildDictionary(IList<string> csvFiles)
+        {
+            TokenInfoDictionaryWriter dictionary = new TokenInfoDictionaryWriter(10 * 1024 * 1024);
+
+            // all lines in the file
+            Console.WriteLine("  parse...");
+            List<string[]> lines = new List<string[]>(400000);
+            foreach (string file in csvFiles)
+            {
+                using (Stream inputStream = new FileStream(file, FileMode.Open, FileAccess.Read))
+                {
+                    Encoding decoder = Encoding.GetEncoding(encoding);
+                    TextReader reader = new StreamReader(inputStream, decoder);
+
+                    string line = null;
+                    while ((line = reader.ReadLine()) != null)
+                    {
+                        string[] entry = CSVUtil.Parse(line);
+
+                        if (entry.Length < 13)
+                        {
+                            Console.WriteLine("Entry in CSV is not valid: " + line);
+                            continue;
+                        }
+
+                        string[] formatted = FormatEntry(entry);
+                        lines.Add(formatted);
+
+                        // NFKC normalize dictionary entry
+                        if (normalizeEntries)
+                        {
+                            //if (normalizer.isNormalized(entry[0])){
+                            if (entry[0].IsNormalized(NormalizationForm.FormKC))
+                            {
+                                continue;
+                            }
+                            string[] normalizedEntry = new string[entry.Length];
+                            for (int i = 0; i < entry.Length; i++)
+                            {
+                                //normalizedEntry[i] = normalizer.normalize(entry[i]);
+                                normalizedEntry[i] = entry[i].Normalize(NormalizationForm.FormKC);
+                            }
+
+                            formatted = FormatEntry(normalizedEntry);
+                            lines.Add(formatted);
+                        }
+                    }
+                }
+            }
+
+            Console.WriteLine("  sort...");
+
+            // sort by term: we sorted the files already and use a stable sort.
+            lines.Sort(new ComparerAnonymousHelper());
+
+            Console.WriteLine("  encode...");
+
+            PositiveInt32Outputs fstOutput = PositiveInt32Outputs.Singleton;
+            Builder<long?> fstBuilder = new Builder<long?>(Lucene.Net.Util.Fst.FST.INPUT_TYPE.BYTE2, 0, 0, true, true, int.MaxValue, fstOutput, null, true, PackedInt32s.DEFAULT, true, 15);
+            Int32sRef scratch = new Int32sRef();
+            long ord = -1; // first ord will be 0
+            string lastValue = null;
+
+            // build tokeninfo dictionary
+            foreach (string[] entry in lines)
+            {
+                int next = dictionary.Put(entry);
+
+                if (next == offset)
+                {
+                    Console.WriteLine("Failed to process line: " + Collections.ToString(entry));
+                    continue;
+                }
+
+                string token = entry[0];
+                if (!token.Equals(lastValue, StringComparison.Ordinal))
+                {
+                    // new word to add to fst
+                    ord++;
+                    lastValue = token;
+                    scratch.Grow(token.Length);
+                    scratch.Length = token.Length;
+                    for (int i = 0; i < token.Length; i++)
+                    {
+                        scratch.Int32s[i] = (int)token[i];
+                    }
+                    fstBuilder.Add(scratch, ord);
+                }
+                dictionary.AddMapping((int)ord, offset);
+                offset = next;
+            }
+
+            FST<long?> fst = fstBuilder.Finish();
+
+            Console.WriteLine("  " + fst.NodeCount + " nodes, " + fst.ArcCount + " arcs, " + fst.GetSizeInBytes() + " bytes...  ");
+            dictionary.SetFST(fst);
+            Console.WriteLine(" done");
+
+            return dictionary;
+        }
+
+        private class ComparerAnonymousHelper : IComparer<string[]>
+        {
+            public int Compare(string[] left, string[] right)
+            {
+                return left[0].CompareToOrdinal(right[0]);
+            }
+        }
+
+        /// <summary>
+        /// IPADIC features
+        /// 
+        /// 0   - surface
+        /// 1   - left cost
+        /// 2   - right cost
+        /// 3   - word cost
+        /// 4-9 - pos
+        /// 10  - base form
+        /// 11  - reading
+        /// 12  - pronounciation
+        /// 
+        /// UniDic features
+        /// 
+        /// 0   - surface
+        /// 1   - left cost
+        /// 2   - right cost
+        /// 3   - word cost
+        /// 4-9 - pos
+        /// 10  - base form reading
+        /// 11  - base form
+        /// 12  - surface form
+        /// 13  - surface reading
+        /// </summary>
+        public virtual string[] FormatEntry(string[] features)
+        {
+            if (this.format == DictionaryBuilder.DictionaryFormat.IPADIC)
+            {
+                return features;
+            }
+            else
+            {
+                string[] features2 = new string[13];
+                features2[0] = features[0];
+                features2[1] = features[1];
+                features2[2] = features[2];
+                features2[3] = features[3];
+                features2[4] = features[4];
+                features2[5] = features[5];
+                features2[6] = features[6];
+                features2[7] = features[7];
+                features2[8] = features[8];
+                features2[9] = features[9];
+                features2[10] = features[11];
+
+                // If the surface reading is non-existent, use surface form for reading and pronunciation.
+                // This happens with punctuation in UniDic and there are possibly other cases as well
+                if (features[13].Length == 0)
+                {
+                    features2[11] = features[0];
+                    features2[12] = features[0];
+                }
+                else
+                {
+                    features2[11] = features[13];
+                    features2[12] = features[13];
+                }
+                return features2;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Tools/TokenInfoDictionaryWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/TokenInfoDictionaryWriter.cs b/src/Lucene.Net.Analysis.Kuromoji/Tools/TokenInfoDictionaryWriter.cs
new file mode 100644
index 0000000..42338f9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/TokenInfoDictionaryWriter.cs
@@ -0,0 +1,51 @@
+using Lucene.Net.Analysis.Ja.Dict;
+using Lucene.Net.Util.Fst;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TokenInfoDictionaryWriter : BinaryDictionaryWriter
+    {
+        private FST<long?> fst;
+
+        public TokenInfoDictionaryWriter(int size)
+            : base(typeof(TokenInfoDictionary), size)
+        {
+        }
+
+        public virtual void SetFST(FST<long?> fst)
+        {
+            this.fst = fst;
+        }
+
+        public override void Write(string baseDir)
+        {
+            base.Write(baseDir);
+            WriteFST(GetBaseFileName(baseDir) + TokenInfoDictionary.FST_FILENAME_SUFFIX);
+        }
+
+        protected virtual void WriteFST(string filename)
+        {
+            FileInfo f = new FileInfo(filename);
+            if (!f.Directory.Exists) f.Directory.Create();
+            fst.Save(f);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Tools/UnknownDictionaryBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/UnknownDictionaryBuilder.cs b/src/Lucene.Net.Analysis.Kuromoji/Tools/UnknownDictionaryBuilder.cs
new file mode 100644
index 0000000..feff4fe
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/UnknownDictionaryBuilder.cs
@@ -0,0 +1,146 @@
+using Lucene.Net.Analysis.Ja.Dict;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class UnknownDictionaryBuilder
+    {
+        private static readonly string NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*";
+
+        private string encoding = "euc-jp";
+
+        public UnknownDictionaryBuilder(string encoding)
+        {
+            this.encoding = encoding;
+        }
+
+        public virtual UnknownDictionaryWriter Build(string dirname)
+        {
+            UnknownDictionaryWriter unkDictionary = ReadDictionaryFile(dirname + System.IO.Path.DirectorySeparatorChar + "unk.def");  //Should be only one file
+            ReadCharacterDefinition(dirname + System.IO.Path.DirectorySeparatorChar + "char.def", unkDictionary);
+            return unkDictionary;
+        }
+
+        public virtual UnknownDictionaryWriter ReadDictionaryFile(string filename)
+        {
+            return ReadDictionaryFile(filename, encoding);
+        }
+
+        public virtual UnknownDictionaryWriter ReadDictionaryFile(string filename, string encoding)
+        {
+            UnknownDictionaryWriter dictionary = new UnknownDictionaryWriter(5 * 1024 * 1024);
+
+            List<string[]> lines = new List<string[]>();
+            Encoding decoder = Encoding.GetEncoding(encoding);
+            using (Stream inputStream = new FileStream(filename, FileMode.Open, FileAccess.Read))
+            using (TextReader reader = new StreamReader(inputStream, decoder))
+            {
+
+                dictionary.Put(CSVUtil.Parse(NGRAM_DICTIONARY_ENTRY));
+
+
+                string line = null;
+                while ((line = reader.ReadLine()) != null)
+                {
+                    // note: unk.def only has 10 fields, it simplifies the writer to just append empty reading and pronunciation,
+                    // even though the unknown dictionary returns hardcoded null here.
+                    string[] parsed = CSVUtil.Parse(line + ",*,*"); // Probably we don't need to validate entry
+                    lines.Add(parsed);
+                }
+            }
+
+            lines.Sort(new ComparerAnonymousHelper());
+
+            foreach (string[] entry in lines)
+            {
+                dictionary.Put(entry);
+            }
+
+            return dictionary;
+        }
+        private class ComparerAnonymousHelper : IComparer<string[]>
+        {
+            public int Compare(string[] left, string[] right)
+            {
+                int leftId = CharacterDefinition.LookupCharacterClass(left[0]);
+                int rightId = CharacterDefinition.LookupCharacterClass(right[0]);
+                return leftId - rightId;
+            }
+        }
+
+        public virtual void ReadCharacterDefinition(string filename, UnknownDictionaryWriter dictionary)
+        {
+            using (Stream inputStream = new FileStream(filename, FileMode.Open, FileAccess.Read))
+            using (TextReader reader = new StreamReader(inputStream, Encoding.GetEncoding(encoding)))
+            {
+                string line = null;
+
+                while ((line = reader.ReadLine()) != null)
+                {
+                    line = Regex.Replace(line, "^\\s", "");
+                    line = Regex.Replace(line, "\\s*#.*", "");
+                    line = Regex.Replace(line, "\\s+", " ");
+
+                    // Skip empty line or comment line
+                    if (line.Length == 0)
+                    {
+                        continue;
+                    }
+
+                    if (line.StartsWith("0x", StringComparison.Ordinal))
+                    {  // Category mapping
+                        string[] values = new Regex(" ").Split(line, 2);  // Split only first space
+
+                        if (!values[0].Contains(".."))
+                        {
+                            int cp = Convert.ToInt32(values[0]);
+                            dictionary.PutCharacterCategory(cp, values[1]);
+                        }
+                        else
+                        {
+                            string[] codePoints = Regex.Split(values[0], "\\.\\.");
+                            int cpFrom = Convert.ToInt32(codePoints[0]);
+                            int cpTo = Convert.ToInt32(codePoints[1]);
+
+                            for (int i = cpFrom; i <= cpTo; i++)
+                            {
+                                dictionary.PutCharacterCategory(i, values[1]);
+                            }
+                        }
+                    }
+                    else
+                    {  // Invoke definition
+                        string[] values = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); // Consecutive space is merged above
+                        string characterClassName = values[0];
+                        int invoke = int.Parse(values[1], CultureInfo.InvariantCulture);
+                        int group = int.Parse(values[2], CultureInfo.InvariantCulture);
+                        int length = int.Parse(values[3], CultureInfo.InvariantCulture);
+                        dictionary.PutInvokeDefinition(characterClassName, invoke, group, length);
+                    }
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Tools/UnknownDictionaryWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/UnknownDictionaryWriter.cs b/src/Lucene.Net.Analysis.Kuromoji/Tools/UnknownDictionaryWriter.cs
new file mode 100644
index 0000000..7d4a982
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/UnknownDictionaryWriter.cs
@@ -0,0 +1,66 @@
+using Lucene.Net.Analysis.Ja.Dict;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class UnknownDictionaryWriter : BinaryDictionaryWriter
+    {
+        private readonly CharacterDefinitionWriter characterDefinition = new CharacterDefinitionWriter();
+
+        public UnknownDictionaryWriter(int size)
+            : base(typeof(UnknownDictionary), size)
+        {
+        }
+
+        public override int Put(string[] entry)
+        {
+            // Get wordId of current entry
+            int wordId = m_buffer.Position;
+
+            // Put entry
+            int result = base.Put(entry);
+
+            // Put entry in targetMap
+            int characterId = CharacterDefinition.LookupCharacterClass(entry[0]);
+            AddMapping(characterId, wordId);
+            return result;
+        }
+
+        /// <summary>
+        /// Put mapping from unicode code point to character class.
+        /// </summary>
+        /// <param name="codePoint">Code point.</param>
+        /// <param name="characterClassName">Character class name.</param>
+        public virtual void PutCharacterCategory(int codePoint, string characterClassName)
+        {
+            characterDefinition.PutCharacterCategory(codePoint, characterClassName);
+        }
+
+        public virtual void PutInvokeDefinition(string characterClassName, int invoke, int group, int length)
+        {
+            characterDefinition.PutInvokeDefinition(characterClassName, invoke, group, length);
+        }
+
+        public override void Write(string baseDir)
+        {
+            base.Write(baseDir);
+            characterDefinition.Write(baseDir);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Analysis.Kuromoji/Util/CSVUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Util/CSVUtil.cs b/src/Lucene.Net.Analysis.Kuromoji/Util/CSVUtil.cs
new file mode 100644
index 0000000..9d86aed
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Kuromoji/Util/CSVUtil.cs
@@ -0,0 +1,124 @@
+using System.Collections.Generic;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Utility class for parsing CSV text
+    /// </summary>
+    public sealed class CSVUtil
+    {
+        private static readonly char QUOTE = '"';
+
+        private static readonly char COMMA = ',';
+
+        private static readonly Regex QUOTE_REPLACE_PATTERN = new Regex("^\"([^\"]+)\"$", RegexOptions.Compiled);
+
+        private static readonly string ESCAPED_QUOTE = "\"\"";
+
+        private CSVUtil() { } // no instance!!!
+
+        /// <summary>
+        /// Parse CSV line
+        /// </summary>
+        /// <param name="line">line containing csv-encoded data</param>
+        /// <returns>Array of values</returns>
+        public static string[] Parse(string line)
+        {
+            bool insideQuote = false;
+            List<string> result = new List<string>();
+            int quoteCount = 0;
+            StringBuilder sb = new StringBuilder();
+            for (int i = 0; i < line.Length; i++)
+            {
+                char c = line[i];
+
+                if (c == QUOTE)
+                {
+                    insideQuote = !insideQuote;
+                    quoteCount++;
+                }
+
+                if (c == COMMA && !insideQuote)
+                {
+                    string value = sb.ToString();
+                    value = UnQuoteUnEscape(value);
+                    result.Add(value);
+                    sb.Length = 0;
+                    continue;
+                }
+
+                sb.Append(c);
+            }
+
+            result.Add(sb.ToString());
+
+            // Validate
+            if (quoteCount % 2 != 0)
+            {
+                return new string[0];
+            }
+
+            return result.ToArray(/*new String[result.size()]*/);
+        }
+
+        private static string UnQuoteUnEscape(string original)
+        {
+            string result = original;
+
+            // Unquote
+            if (result.IndexOf('\"') >= 0)
+            {
+                Match m = QUOTE_REPLACE_PATTERN.Match(original);
+                if (m.Success)
+                {
+                    result = m.Groups[1].Value;
+                }
+
+                // Unescape
+                if (result.IndexOf(ESCAPED_QUOTE) >= 0)
+                {
+                    result = result.Replace(ESCAPED_QUOTE, "\"");
+                }
+            }
+
+            return result;
+        }
+
+        /// <summary>
+        /// Quote and escape input value for CSV
+        /// </summary>
+        public static string QuoteEscape(string original)
+        {
+            string result = original;
+
+            if (result.IndexOf('\"') >= 0)
+            {
+                result.Replace("\"", ESCAPED_QUOTE);
+            }
+            if (result.IndexOf(COMMA) >= 0)
+            {
+                result = "\"" + result + "\"";
+            }
+            return result;
+        }
+    }
+}