You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucenenet.apache.org by sy...@apache.org on 2016/12/14 20:00:47 UTC

[1/4] lucenenet git commit: Renamed hyphenation to Hyphenation to fix build and run on case sensitive file systems

Repository: lucenenet
Updated Branches:
  refs/heads/master e75dbf614 -> 7214c8a3c


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
deleted file mode 100644
index 88cfd01..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
+++ /dev/null
@@ -1,816 +0,0 @@
-\ufeffusing System;
-using System.Collections;
-using System.Collections.Generic;
-using System.IO;
-using System.Text;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// <h2>Ternary Search Tree.</h2>
-    /// 
-    /// <para>
-    /// A ternary search tree is a hybrid between a binary tree and a digital search
-    /// tree (trie). Keys are limited to strings. A data value of type char is stored
-    /// in each leaf node. It can be used as an index (or pointer) to the data.
-    /// Branches that only contain one key are compressed to one node by storing a
-    /// pointer to the trailer substring of the key. This class is intended to serve
-    /// as base class or helper class to implement Dictionary collections or the
-    /// like. Ternary trees have some nice properties as the following: the tree can
-    /// be traversed in sorted order, partial matches (wildcard) can be implemented,
-    /// retrieval of all keys within a given distance from the target, etc. The
-    /// storage requirements are higher than a binary tree but a lot less than a
-    /// trie. Performance is comparable with a hash table, sometimes it outperforms a
-    /// hash function (most of the time can determine a miss faster than a hash).
-    /// </para>
-    /// 
-    /// <para>
-    /// The main purpose of this java port is to serve as a base for implementing
-    /// TeX's hyphenation algorithm (see The TeXBook, appendix H). Each language
-    /// requires from 5000 to 15000 hyphenation patterns which will be keys in this
-    /// tree. The strings patterns are usually small (from 2 to 5 characters), but
-    /// each char in the tree is stored in a node. Thus memory usage is the main
-    /// concern. We will sacrifice 'elegance' to keep memory requirements to the
-    /// minimum. Using java's char type as pointer (yes, I know pointer it is a
-    /// forbidden word in java) we can keep the size of the node to be just 8 bytes
-    /// (3 pointers and the data char). This gives room for about 65000 nodes. In my
-    /// tests the english patterns took 7694 nodes and the german patterns 10055
-    /// nodes, so I think we are safe.
-    /// </para>
-    /// 
-    /// <para>
-    /// All said, this is a map with strings as keys and char as value. Pretty
-    /// limited!. It can be extended to a general map by using the string
-    /// representation of an object and using the char value as an index to an array
-    /// that contains the object values.
-    /// </para>
-    /// 
-    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-    /// </summary>
-
-    public class TernaryTree : ICloneable
-    {
-        /// <summary>
-        /// We use 4 arrays to represent a node. I guess I should have created a proper
-        /// node class, but somehow Knuth's pascal code made me forget we now have a
-        /// portable language with virtual memory management and automatic garbage
-        /// collection! And now is kind of late, furthermore, if it ain't broken, don't
-        /// fix it.
-        /// </summary>
-
-        /// <summary>
-        /// Pointer to low branch and to rest of the key when it is stored directly in
-        /// this node, we don't have unions in java!
-        /// </summary>
-        protected internal char[] lo;
-
-        /// <summary>
-        /// Pointer to high branch.
-        /// </summary>
-        protected internal char[] hi;
-
-        /// <summary>
-        /// Pointer to equal branch and to data when this node is a string terminator.
-        /// </summary>
-        protected internal char[] eq;
-
-        /// <summary>
-        /// <P>
-        /// The character stored in this node: splitchar. Two special values are
-        /// reserved:
-        /// </P>
-        /// <ul>
-        /// <li>0x0000 as string terminator</li>
-        /// <li>0xFFFF to indicate that the branch starting at this node is compressed</li>
-        /// </ul>
-        /// <para>
-        /// This shouldn't be a problem if we give the usual semantics to strings since
-        /// 0xFFFF is guaranteed not to be an Unicode character.
-        /// </para>
-        /// </summary>
-        protected internal char[] sc;
-
-        /// <summary>
-        /// This vector holds the trailing of the keys when the branch is compressed.
-        /// </summary>
-        protected internal CharVector kv;
-
-        protected internal char root;
-
-        protected internal char freenode;
-
-        protected internal int length; // number of items in tree
-
-        protected internal const int BLOCK_SIZE = 2048; // allocation size for arrays
-
-        internal TernaryTree()
-        {
-            Init();
-        }
-
-        protected internal virtual void Init()
-        {
-            root = (char)0;
-            freenode = (char)1;
-            length = 0;
-            lo = new char[BLOCK_SIZE];
-            hi = new char[BLOCK_SIZE];
-            eq = new char[BLOCK_SIZE];
-            sc = new char[BLOCK_SIZE];
-            kv = new CharVector();
-        }
-
-        /// <summary>
-        /// Branches are initially compressed, needing one node per key plus the size
-        /// of the string key. They are decompressed as needed when another key with
-        /// same prefix is inserted. This saves a lot of space, specially for long
-        /// keys.
-        /// </summary>
-        public virtual void Insert(string key, char val)
-        {
-            // make sure we have enough room in the arrays
-            int len = key.Length + 1; // maximum number of nodes that may be generated
-            if (freenode + len > eq.Length)
-            {
-                RedimNodeArrays(eq.Length + BLOCK_SIZE);
-            }
-            char[] strkey = new char[len--];
-            key.CopyTo(0, strkey, 0, len - 0);
-            strkey[len] = (char)0;
-            root = Insert(root, strkey, 0, val);
-        }
-
-        public virtual void Insert(char[] key, int start, char val)
-        {
-            int len = StrLen(key) + 1;
-            if (freenode + len > eq.Length)
-            {
-                RedimNodeArrays(eq.Length + BLOCK_SIZE);
-            }
-            root = Insert(root, key, start, val);
-        }
-
-        /// <summary>
-        /// The actual insertion function, recursive version.
-        /// </summary>
-        private char Insert(char p, char[] key, int start, char val)
-        {
-            int len = StrLen(key, start);
-            if (p == 0)
-            {
-                // this means there is no branch, this node will start a new branch.
-                // Instead of doing that, we store the key somewhere else and create
-                // only one node with a pointer to the key
-                p = freenode++;
-                eq[p] = val; // holds data
-                length++;
-                hi[p] = (char)0;
-                if (len > 0)
-                {
-                    sc[p] = (char)0xFFFF; // indicates branch is compressed
-                    lo[p] = (char)kv.Alloc(len + 1); // use 'lo' to hold pointer to key
-                    StrCpy(kv.Array, lo[p], key, start);
-                }
-                else
-                {
-                    sc[p] = (char)0;
-                    lo[p] = (char)0;
-                }
-                return p;
-            }
-
-            if (sc[p] == 0xFFFF)
-            {
-                // branch is compressed: need to decompress
-                // this will generate garbage in the external key array
-                // but we can do some garbage collection later
-                char pp = freenode++;
-                lo[pp] = lo[p]; // previous pointer to key
-                eq[pp] = eq[p]; // previous pointer to data
-                lo[p] = (char)0;
-                if (len > 0)
-                {
-                    sc[p] = kv[lo[pp]];
-                    eq[p] = pp;
-                    lo[pp]++;
-                    if (kv[lo[pp]] == 0)
-                    {
-                        // key completly decompressed leaving garbage in key array
-                        lo[pp] = (char)0;
-                        sc[pp] = (char)0;
-                        hi[pp] = (char)0;
-                    }
-                    else
-                    {
-                        // we only got first char of key, rest is still there
-                        sc[pp] = (char)0xFFFF;
-                    }
-                }
-                else
-                {
-                    // In this case we can save a node by swapping the new node
-                    // with the compressed node
-                    sc[pp] = (char)0xFFFF;
-                    hi[p] = pp;
-                    sc[p] = (char)0;
-                    eq[p] = val;
-                    length++;
-                    return p;
-                }
-            }
-            char s = key[start];
-            if (s < sc[p])
-            {
-                lo[p] = Insert(lo[p], key, start, val);
-            }
-            else if (s == sc[p])
-            {
-                if (s != 0)
-                {
-                    eq[p] = Insert(eq[p], key, start + 1, val);
-                }
-                else
-                {
-                    // key already in tree, overwrite data
-                    eq[p] = val;
-                }
-            }
-            else
-            {
-                hi[p] = Insert(hi[p], key, start, val);
-            }
-            return p;
-        }
-
-        /// <summary>
-        /// Compares 2 null terminated char arrays
-        /// </summary>
-        public static int StrCmp(char[] a, int startA, char[] b, int startB)
-        {
-            for (; a[startA] == b[startB]; startA++, startB++)
-            {
-                if (a[startA] == 0)
-                {
-                    return 0;
-                }
-            }
-            return a[startA] - b[startB];
-        }
-
-        /// <summary>
-        /// Compares a string with null terminated char array
-        /// </summary>
-        public static int StrCmp(string str, char[] a, int start)
-        {
-            int i, d, len = str.Length;
-            for (i = 0; i < len; i++)
-            {
-                d = (int)str[i] - a[start + i];
-                if (d != 0)
-                {
-                    return d;
-                }
-                if (a[start + i] == 0)
-                {
-                    return d;
-                }
-            }
-            if (a[start + i] != 0)
-            {
-                return -a[start + i];
-            }
-            return 0;
-
-        }
-
-        public static void StrCpy(char[] dst, int di, char[] src, int si)
-        {
-            while (src[si] != 0)
-            {
-                dst[di++] = src[si++];
-            }
-            dst[di] = (char)0;
-        }
-
-        public static int StrLen(char[] a, int start)
-        {
-            int len = 0;
-            for (int i = start; i < a.Length && a[i] != 0; i++)
-            {
-                len++;
-            }
-            return len;
-        }
-
-        public static int StrLen(char[] a)
-        {
-            return StrLen(a, 0);
-        }
-
-        public virtual int Find(string key)
-        {
-            int len = key.Length;
-            char[] strkey = new char[len + 1];
-            key.CopyTo(0, strkey, 0, len - 0);
-            strkey[len] = (char)0;
-
-            return Find(strkey, 0);
-        }
-
-        public virtual int Find(char[] key, int start)
-        {
-            int d;
-            char p = root;
-            int i = start;
-            char c;
-
-            while (p != 0)
-            {
-                if (sc[p] == 0xFFFF)
-                {
-                    if (StrCmp(key, i, kv.Array, lo[p]) == 0)
-                    {
-                        return eq[p];
-                    }
-                    else
-                    {
-                        return -1;
-                    }
-                }
-                c = key[i];
-                d = c - sc[p];
-                if (d == 0)
-                {
-                    if (c == 0)
-                    {
-                        return eq[p];
-                    }
-                    i++;
-                    p = eq[p];
-                }
-                else if (d < 0)
-                {
-                    p = lo[p];
-                }
-                else
-                {
-                    p = hi[p];
-                }
-            }
-            return -1;
-        }
-
-        public virtual bool Knows(string key)
-        {
-            return (Find(key) >= 0);
-        }
-
-        // redimension the arrays
-        private void RedimNodeArrays(int newsize)
-        {
-            int len = newsize < lo.Length ? newsize : lo.Length;
-            char[] na = new char[newsize];
-            Array.Copy(lo, 0, na, 0, len);
-            lo = na;
-            na = new char[newsize];
-            Array.Copy(hi, 0, na, 0, len);
-            hi = na;
-            na = new char[newsize];
-            Array.Copy(eq, 0, na, 0, len);
-            eq = na;
-            na = new char[newsize];
-            Array.Copy(sc, 0, na, 0, len);
-            sc = na;
-        }
-
-        public virtual int Length
-        {
-            get { return length; }
-        }
-
-        public object Clone()
-        {
-            TernaryTree t = new TernaryTree();
-            t.lo = (char[])this.lo.Clone();
-            t.hi = (char[])this.hi.Clone();
-            t.eq = (char[])this.eq.Clone();
-            t.sc = (char[])this.sc.Clone();
-            t.kv = (CharVector)this.kv.Clone();
-            t.root = this.root;
-            t.freenode = this.freenode;
-            t.length = this.length;
-
-            return t;
-        }
-
-        /// <summary>
-        /// Recursively insert the median first and then the median of the lower and
-        /// upper halves, and so on in order to get a balanced tree. The array of keys
-        /// is assumed to be sorted in ascending order.
-        /// </summary>
-        protected internal virtual void InsertBalanced(string[] k, char[] v, int offset, int n)
-        {
-            int m;
-            if (n < 1)
-            {
-                return;
-            }
-            m = n >> 1;
-
-            Insert(k[m + offset], v[m + offset]);
-            InsertBalanced(k, v, offset, m);
-
-            InsertBalanced(k, v, offset + m + 1, n - m - 1);
-        }
-
-        /// <summary>
-        /// Balance the tree for best search performance
-        /// </summary>
-        public virtual void Balance()
-        {
-            // System.out.print("Before root splitchar = ");
-            // System.out.println(sc[root]);
-
-            int i = 0, n = length;
-            string[] k = new string[n];
-            char[] v = new char[n];
-            Iterator iter = new Iterator(this);
-            while (iter.MoveNext())
-            {
-                v[i] = iter.Value;
-                k[i++] = iter.Current;
-            }
-            Init();
-            InsertBalanced(k, v, 0, n);
-
-            // With uniform letter distribution sc[root] should be around 'm'
-            // System.out.print("After root splitchar = ");
-            // System.out.println(sc[root]);
-        }
-
-        /// <summary>
-        /// Each node stores a character (splitchar) which is part of some key(s). In a
-        /// compressed branch (one that only contain a single string key) the trailer
-        /// of the key which is not already in nodes is stored externally in the kv
-        /// array. As items are inserted, key substrings decrease. Some substrings may
-        /// completely disappear when the whole branch is totally decompressed. The
-        /// tree is traversed to find the key substrings actually used. In addition,
-        /// duplicate substrings are removed using a map (implemented with a
-        /// TernaryTree!).
-        /// 
-        /// </summary>
-        public virtual void TrimToSize()
-        {
-            // first balance the tree for best performance
-            Balance();
-
-            // redimension the node arrays
-            RedimNodeArrays(freenode);
-
-            // ok, compact kv array
-            CharVector kx = new CharVector();
-            kx.Alloc(1);
-            TernaryTree map = new TernaryTree();
-            Compact(kx, map, root);
-            kv = kx;
-            kv.TrimToSize();
-        }
-
-        private void Compact(CharVector kx, TernaryTree map, char p)
-        {
-            int k;
-            if (p == 0)
-            {
-                return;
-            }
-            if (sc[p] == 0xFFFF)
-            {
-                k = map.Find(kv.Array, lo[p]);
-                if (k < 0)
-                {
-                    k = kx.Alloc(StrLen(kv.Array, lo[p]) + 1);
-                    StrCpy(kx.Array, k, kv.Array, lo[p]);
-                    map.Insert(kx.Array, k, (char)k);
-                }
-                lo[p] = (char)k;
-            }
-            else
-            {
-                Compact(kx, map, lo[p]);
-                if (sc[p] != 0)
-                {
-                    Compact(kx, map, eq[p]);
-                }
-                Compact(kx, map, hi[p]);
-            }
-        }
-
-        public virtual IEnumerator<string> Keys()
-        {
-            return new Iterator(this);
-        }
-
-        /// <summary>
-        /// Enumerator for TernaryTree
-        /// 
-        /// LUCENENET NOTE: This differs a bit from its Java counterpart to adhere to
-        /// .NET IEnumerator semantics. In Java, when the <see cref="Iterator"/> is
-        /// instantiated, it is already positioned at the first element. However,
-        /// to act like a .NET IEnumerator, the initial state is undefined and considered
-        /// to be before the first element until <see cref="MoveNext"/> is called, and
-        /// if a move took place it will return <c>true</c>;
-        /// </summary>
-        public class Iterator : IEnumerator<string>
-        {
-            private readonly TernaryTree outerInstance;
-
-
-            /// <summary>
-            /// current node index
-            /// </summary>
-            private int cur;
-
-            /// <summary>
-            /// current key
-            /// </summary>
-            private string curkey;
-
-            internal class Item : ICloneable
-            {
-                internal char parent;
-                internal char child;
-
-                public Item()
-                {
-                    parent = (char)0;
-                    child = (char)0;
-                }
-
-                public Item(char p, char c)
-                {
-                    parent = p;
-                    child = c;
-                }
-
-                public object Clone()
-                {
-                    return new Item(parent, child);
-                }
-
-            }
-
-            /// <summary>
-            /// Node stack
-            /// </summary>
-            internal Stack<Item> ns;
-
-            /// <summary>
-            /// key stack implemented with a StringBuilder
-            /// </summary>
-            internal StringBuilder ks;
-
-            private bool isInitialized = false;
-
-            public Iterator(TernaryTree outerInstance)
-            {
-                this.outerInstance = outerInstance;
-                cur = -1;
-                ns = new Stack<Item>();
-                ks = new StringBuilder();
-                isInitialized = false;
-            }
-
-            public virtual void Rewind()
-            {
-                ns.Clear();
-                ks.Length = 0;
-                cur = outerInstance.root;
-                Run();
-            }
-
-            public virtual char Value
-            {
-                get
-                {
-                    if (cur >= 0)
-                    {
-                        return outerInstance.eq[cur];
-                    }
-                    return (char)0;
-                }
-            }
-
-            /// <summary>
-            /// traverse upwards
-            /// </summary>
-            internal virtual int Up()
-            {
-                Item i = new Item();
-                int res = 0;
-
-                if (ns.Count == 0)
-                {
-                    return -1;
-                }
-
-                if (cur != 0 && outerInstance.sc[cur] == 0)
-                {
-                    return outerInstance.lo[cur];
-                }
-
-                bool climb = true;
-
-                while (climb)
-                {
-                    i = ns.Pop();
-                    i.child++;
-                    switch ((int)i.child)
-                    {
-                        case 1:
-                            if (outerInstance.sc[i.parent] != 0)
-                            {
-                                res = outerInstance.eq[i.parent];
-                                ns.Push((Item)i.Clone());
-                                ks.Append(outerInstance.sc[i.parent]);
-                            }
-                            else
-                            {
-                                i.child++;
-                                ns.Push((Item)i.Clone());
-                                res = outerInstance.hi[i.parent];
-                            }
-                            climb = false;
-                            break;
-
-                        case 2:
-                            res = outerInstance.hi[i.parent];
-                            ns.Push((Item)i.Clone());
-                            if (ks.Length > 0)
-                            {
-                                ks.Length = ks.Length - 1; // pop
-                            }
-                            climb = false;
-                            break;
-
-                        default:
-                            if (ns.Count == 0)
-                            {
-                                return -1;
-                            }
-                            climb = true;
-                            break;
-                    }
-                }
-                return res;
-            }
-
-            /// <summary>
-            /// traverse the tree to find next key
-            /// </summary>
-            internal virtual int Run()
-            {
-                if (cur == -1)
-                {
-                    return -1;
-                }
-
-                bool leaf = false;
-                while (true)
-                {
-                    // first go down on low branch until leaf or compressed branch
-                    while (cur != 0)
-                    {
-                        if (outerInstance.sc[cur] == 0xFFFF)
-                        {
-                            leaf = true;
-                            break;
-                        }
-                        ns.Push(new Item((char)cur, '\u0000'));
-                        if (outerInstance.sc[cur] == 0)
-                        {
-                            leaf = true;
-                            break;
-                        }
-                        cur = outerInstance.lo[cur];
-                    }
-                    if (leaf)
-                    {
-                        break;
-                    }
-                    // nothing found, go up one node and try again
-                    cur = Up();
-                    if (cur == -1)
-                    {
-                        return -1;
-                    }
-                }
-                // The current node should be a data node and
-                // the key should be in the key stack (at least partially)
-                StringBuilder buf = new StringBuilder(ks.ToString());
-                if (outerInstance.sc[cur] == 0xFFFF)
-                {
-                    int p = outerInstance.lo[cur];
-                    while (outerInstance.kv[p] != 0)
-                    {
-                        buf.Append(outerInstance.kv[p++]);
-                    }
-                }
-                curkey = buf.ToString();
-                return 0;
-            }
-
-            #region Added for better .NET support
-            public string Current
-            {
-                get
-                {
-                    return curkey;
-                }
-            }
-
-            object IEnumerator.Current
-            {
-                get
-                {
-                    return Current;
-                }
-            }
-
-            public void Dispose()
-            {
-                // nothing to do
-            }
-
-            public bool MoveNext()
-            {
-                if (!isInitialized)
-                {
-                    Rewind();
-                    isInitialized = true;
-                    return cur != -1;
-                }
-                if (cur == -1)
-                {
-                    return false;
-                }
-                cur = Up();
-                Run();
-                return cur != -1;
-            }
-
-            public void Reset()
-            {
-                throw new NotSupportedException();
-            }
-
-            #endregion
-        }
-
-        public virtual void PrintStats(TextWriter @out)
-        {
-            @out.WriteLine("Number of keys = " + Convert.ToString(length));
-            @out.WriteLine("Node count = " + Convert.ToString(freenode));
-            // System.out.println("Array length = " + Integer.toString(eq.length));
-            @out.WriteLine("Key Array length = " + Convert.ToString(kv.Length()));
-
-            /*
-             * for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 )
-             * System.out.print(kv.get(i)); else System.out.println("");
-             * System.out.println("Keys:"); for(Enumeration enum = keys();
-             * enum.hasMoreElements(); ) System.out.println(enum.nextElement());
-             */
-        }
-        /*
-          public static void main(String[] args) {
-            TernaryTree tt = new TernaryTree();
-            tt.insert("Carlos", 'C');
-            tt.insert("Car", 'r');
-            tt.insert("palos", 'l');
-            tt.insert("pa", 'p');
-            tt.trimToSize();
-            System.out.println((char) tt.find("Car"));
-            System.out.println((char) tt.find("Carlos"));
-            System.out.println((char) tt.find("alto"));
-            tt.printStats(System.out);
-          }
-          */
-
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/hyphenation.dtd
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/hyphenation.dtd b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/hyphenation.dtd
deleted file mode 100644
index 083c2bd..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/hyphenation.dtd
+++ /dev/null
@@ -1,68 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!--
-  Copyright 1999-2004 The Apache Software Foundation
-
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<!-- $Id: hyphenation.dtd,v 1.3 2004/02/27 18:34:59 jeremias Exp $ -->
-
-<!ELEMENT hyphenation-info (hyphen-char?, hyphen-min?,
-                           classes, exceptions?, patterns)>
-
-<!-- Hyphen character to be used in the exception list as shortcut for
-     <hyphen pre-break="-"/>. Defaults to '-'
--->
-<!ELEMENT hyphen-char EMPTY>
-<!ATTLIST hyphen-char value CDATA #REQUIRED>
-
-<!-- Default minimun length in characters of hyphenated word fragments
-     before and after the line break. For some languages this is not
-     only for aesthetic purposes, wrong hyphens may be generated if this
-     is not accounted for.
--->
-<!ELEMENT hyphen-min EMPTY>
-<!ATTLIST hyphen-min before CDATA #REQUIRED>
-<!ATTLIST hyphen-min after CDATA #REQUIRED>
-
-<!-- Character equivalent classes: space separated list of character groups, all
-     characters in a group are to be treated equivalent as far as
-     the hyphenation algorithm is concerned. The first character in a group
-     is the group's equivalent character. Patterns should only contain
-     first characters. It also defines word characters, i.e. a word that
-     contains characters not present in any of the classes is not hyphenated.
--->
-<!ELEMENT classes (#PCDATA)>
-
-<!-- Hyphenation exceptions: space separated list of hyphenated words.
-     A hyphen is indicated by the hyphen tag, but you can use the
-     hyphen-char defined previously as shortcut. This is in cases
-     when the algorithm procedure finds wrong hyphens or you want
-     to provide your own hyphenation for some words.
--->
-<!ELEMENT exceptions (#PCDATA|hyphen)* >
-
-<!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
-     characters as described before, between any two word characters a digit
-     in the range 0 to 9 may be specified. The absence of a digit is equivalent
-     to zero. The '.' character is reserved to indicate begining or ending
-     of words. -->
-<!ELEMENT patterns (#PCDATA)>
-
-<!-- A "full hyphen" equivalent to TeX's \discretionary
-     with pre-break, post-break and no-break attributes.
-     To be used in the exceptions list, the hyphen character is not
-     automatically added -->
-<!ELEMENT hyphen EMPTY>
-<!ATTLIST hyphen pre CDATA #IMPLIED>
-<!ATTLIST hyphen no CDATA #IMPLIED>
-<!ATTLIST hyphen post CDATA #IMPLIED>

[2/4] lucenenet git commit: Renamed hyphenation to Hyphenation to fix build and run on case sensitive file systems

Posted by sy...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/hyphenation.dtd
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/hyphenation.dtd b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/hyphenation.dtd
new file mode 100644
index 0000000..083c2bd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/hyphenation.dtd
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  Copyright 1999-2004 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!-- $Id: hyphenation.dtd,v 1.3 2004/02/27 18:34:59 jeremias Exp $ -->
+
+<!ELEMENT hyphenation-info (hyphen-char?, hyphen-min?,
+                           classes, exceptions?, patterns)>
+
+<!-- Hyphen character to be used in the exception list as shortcut for
+     <hyphen pre-break="-"/>. Defaults to '-'
+-->
+<!ELEMENT hyphen-char EMPTY>
+<!ATTLIST hyphen-char value CDATA #REQUIRED>
+
+<!-- Default minimun length in characters of hyphenated word fragments
+     before and after the line break. For some languages this is not
+     only for aesthetic purposes, wrong hyphens may be generated if this
+     is not accounted for.
+-->
+<!ELEMENT hyphen-min EMPTY>
+<!ATTLIST hyphen-min before CDATA #REQUIRED>
+<!ATTLIST hyphen-min after CDATA #REQUIRED>
+
+<!-- Character equivalent classes: space separated list of character groups, all
+     characters in a group are to be treated equivalent as far as
+     the hyphenation algorithm is concerned. The first character in a group
+     is the group's equivalent character. Patterns should only contain
+     first characters. It also defines word characters, i.e. a word that
+     contains characters not present in any of the classes is not hyphenated.
+-->
+<!ELEMENT classes (#PCDATA)>
+
+<!-- Hyphenation exceptions: space separated list of hyphenated words.
+     A hyphen is indicated by the hyphen tag, but you can use the
+     hyphen-char defined previously as shortcut. This is in cases
+     when the algorithm procedure finds wrong hyphens or you want
+     to provide your own hyphenation for some words.
+-->
+<!ELEMENT exceptions (#PCDATA|hyphen)* >
+
+<!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
+     characters as described before, between any two word characters a digit
+     in the range 0 to 9 may be specified. The absence of a digit is equivalent
+     to zero. The '.' character is reserved to indicate begining or ending
+     of words. -->
+<!ELEMENT patterns (#PCDATA)>
+
+<!-- A "full hyphen" equivalent to TeX's \discretionary
+     with pre-break, post-break and no-break attributes.
+     To be used in the exceptions list, the hyphen character is not
+     automatically added -->
+<!ELEMENT hyphen EMPTY>
+<!ATTLIST hyphen pre CDATA #IMPLIED>
+<!ATTLIST hyphen no CDATA #IMPLIED>
+<!ATTLIST hyphen post CDATA #IMPLIED>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
deleted file mode 100644
index 6442d11..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
+++ /dev/null
@@ -1,156 +0,0 @@
-\ufeffnamespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// This class implements a simple byte vector with access to the underlying
-    /// array.
-    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-    /// </summary>
-    public class ByteVector
-    {
-
-        /// <summary>
-        /// Capacity increment size
-        /// </summary>
-        private const int DEFAULT_BLOCK_SIZE = 2048;
-
-        private int blockSize;
-
-        /// <summary>
-        /// The encapsulated array
-        /// </summary>
-        private sbyte[] array;
-
-        /// <summary>
-        /// Points to next free item
-        /// </summary>
-        private int n;
-
-        public ByteVector() : this(DEFAULT_BLOCK_SIZE)
-        {
-        }
-
-        public ByteVector(int capacity)
-        {
-            if (capacity > 0)
-            {
-                blockSize = capacity;
-            }
-            else
-            {
-                blockSize = DEFAULT_BLOCK_SIZE;
-            }
-            array = new sbyte[blockSize];
-            n = 0;
-        }
-
-        public ByteVector(sbyte[] a)
-        {
-            blockSize = DEFAULT_BLOCK_SIZE;
-            array = a;
-            n = 0;
-        }
-
-        public ByteVector(sbyte[] a, int capacity)
-        {
-            if (capacity > 0)
-            {
-                blockSize = capacity;
-            }
-            else
-            {
-                blockSize = DEFAULT_BLOCK_SIZE;
-            }
-            array = a;
-            n = 0;
-        }
-
-        public virtual sbyte[] Array
-        {
-            get
-            {
-                return array;
-            }
-        }
-
-        /// <summary>
-        /// LUCENENET indexer for .NET
-        /// </summary>
-        /// <param name="index"></param>
-        /// <returns></returns>
-        public virtual sbyte this[int index]
-        {
-            get { return array[index]; }
-            set { array[index] = value; }
-        }
-
-        /// <summary>
-        /// return number of items in array
-        /// </summary>
-        public virtual int Length
-        {
-            get { return n; }
-        }
-
-        /// <summary>
-        /// returns current capacity of array
-        /// </summary>
-        public virtual int Capacity
-        {
-            get { return array.Length; }
-        }
-
-        //public virtual void Put(int index, sbyte val)
-        //{
-        //    array[index] = val;
-        //}
-
-        //public virtual sbyte Get(int index)
-        //{
-        //    return array[index];
-        //}
-
-        /// <summary>
-        /// This is to implement memory allocation in the array. Like malloc().
-        /// </summary>
-        public virtual int Alloc(int size)
-        {
-            int index = n;
-            int len = array.Length;
-            if (n + size >= len)
-            {
-                sbyte[] aux = new sbyte[len + blockSize];
-                System.Array.Copy(array, 0, aux, 0, len);
-                array = aux;
-            }
-            n += size;
-            return index;
-        }
-
-        public virtual void TrimToSize()
-        {
-            if (n < array.Length)
-            {
-                sbyte[] aux = new sbyte[n];
-                System.Array.Copy(array, 0, aux, 0, n);
-                array = aux;
-            }
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
deleted file mode 100644
index 26fcea5..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
+++ /dev/null
@@ -1,171 +0,0 @@
-\ufeffusing System;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// This class implements a simple char vector with access to the underlying
-    /// array.
-    /// 
-    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-    /// </summary>
-    public class CharVector : ICloneable
-    {
-
-        /// <summary>
-        /// Capacity increment size
-        /// </summary>
-        private const int DEFAULT_BLOCK_SIZE = 2048;
-
-        private int blockSize;
-
-        /// <summary>
-        /// The encapsulated array
-        /// </summary>
-        private char[] array;
-
-        /// <summary>
-        /// Points to next free item
-        /// </summary>
-        private int n;
-
-        public CharVector() : this(DEFAULT_BLOCK_SIZE)
-        {
-        }
-
-        public CharVector(int capacity)
-        {
-            if (capacity > 0)
-            {
-                blockSize = capacity;
-            }
-            else
-            {
-                blockSize = DEFAULT_BLOCK_SIZE;
-            }
-            array = new char[blockSize];
-            n = 0;
-        }
-
-        public CharVector(char[] a)
-        {
-            blockSize = DEFAULT_BLOCK_SIZE;
-            array = a;
-            n = a.Length;
-        }
-
-        public CharVector(char[] a, int capacity)
-        {
-            if (capacity > 0)
-            {
-                blockSize = capacity;
-            }
-            else
-            {
-                blockSize = DEFAULT_BLOCK_SIZE;
-            }
-            array = a;
-            n = a.Length;
-        }
-
-        /// <summary>
-        /// Reset Vector but don't resize or clear elements
-        /// </summary>
-        public virtual void Clear()
-        {
-            n = 0;
-        }
-
-        public virtual object Clone()
-        {
-            CharVector cv = new CharVector(array, blockSize);
-            cv.n = this.n;
-            return cv;
-        }
-
-        public virtual char[] Array
-        {
-            get
-            {
-                return array;
-            }
-        }
-
-        /// <summary>
-        /// LUCENENET indexer for .NET
-        /// </summary>
-        /// <param name="index"></param>
-        /// <returns></returns>
-        public virtual char this[int index]
-        {
-            get { return array[index]; }
-            set { array[index] = value; }
-        }
-
-        /// <summary>
-        /// return number of items in array
-        /// </summary>
-        public virtual int Length()
-        {
-            return n;
-        }
-
-        /// <summary>
-        /// returns current capacity of array
-        /// </summary>
-        public virtual int Capacity
-        {
-            get { return array.Length; }
-        }
-
-        //public virtual void Put(int index, char val)
-        //{
-        //    array[index] = val;
-        //}
-
-        //public virtual char get(int index)
-        //{
-        //    return array[index];
-        //}
-
-        public virtual int Alloc(int size)
-        {
-            int index = n;
-            int len = array.Length;
-            if (n + size >= len)
-            {
-                char[] aux = new char[len + blockSize];
-                System.Array.Copy(array, 0, aux, 0, len);
-                array = aux;
-            }
-            n += size;
-            return index;
-        }
-
-        public virtual void TrimToSize()
-        {
-            if (n < array.Length)
-            {
-                char[] aux = new char[n];
-                System.Array.Copy(array, 0, aux, 0, n);
-                array = aux;
-            }
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
deleted file mode 100644
index 91009b1..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
+++ /dev/null
@@ -1,72 +0,0 @@
-\ufeffusing System.Text;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// This class represents a hyphen. A 'full' hyphen is made of 3 parts: the
-    /// pre-break text, post-break text and no-break. If no line-break is generated
-    /// at this position, the no-break text is used, otherwise, pre-break and
-    /// post-break are used. Typically, pre-break is equal to the hyphen character
-    /// and the others are empty. However, this general scheme allows support for
-    /// cases in some languages where words change spelling if they're split across
-    /// lines, like german's 'backen' which hyphenates 'bak-ken'. BTW, this comes
-    /// from TeX.
-    /// 
-    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-    /// </summary>
-    public class Hyphen
-    {
-        public string preBreak;
-
-        public string noBreak;
-
-        public string postBreak;
-
-        internal Hyphen(string pre, string no, string post)
-        {
-            preBreak = pre;
-            noBreak = no;
-            postBreak = post;
-        }
-
-        internal Hyphen(string pre)
-        {
-            preBreak = pre;
-            noBreak = null;
-            postBreak = null;
-        }
-
-        public override string ToString()
-        {
-            if (noBreak == null && postBreak == null && preBreak != null && preBreak.Equals("-"))
-            {
-                return "-";
-            }
-            StringBuilder res = new StringBuilder("{");
-            res.Append(preBreak);
-            res.Append("}{");
-            res.Append(postBreak);
-            res.Append("}{");
-            res.Append(noBreak);
-            res.Append('}');
-            return res.ToString();
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
deleted file mode 100644
index fdbac29..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
+++ /dev/null
@@ -1,53 +0,0 @@
-\ufeffnamespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// This class represents a hyphenated word.
-    /// 
-    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
-    /// </summary>
-    public class Hyphenation
-    {
-
-        private readonly int[] hyphenPoints;
-
-        /// <summary>
-        /// rawWord as made of alternating strings and <seealso cref="Hyphen"/> instances
-        /// </summary>
-        internal Hyphenation(int[] points)
-        {
-            hyphenPoints = points;
-        }
-
-        /// <returns> the number of hyphenation points in the word </returns>
-        public virtual int Length
-        {
-            get { return hyphenPoints.Length; }
-        }
-
-        /// <returns> the hyphenation points </returns>
-        public virtual int[] HyphenationPoints
-        {
-            get
-            {
-                return hyphenPoints;
-            }
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
deleted file mode 100644
index 287f6f3..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
+++ /dev/null
@@ -1,581 +0,0 @@
-\ufeffusing Lucene.Net.Support;
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Text;
-using System.Xml;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-	/// This tree structure stores the hyphenation patterns in an efficient way for
-	/// fast lookup. It provides the provides the method to hyphenate a word.
-	/// 
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-	/// </summary>
-	public class HyphenationTree : TernaryTree, IPatternConsumer
-    {
-
-        /// <summary>
-        /// value space: stores the interletter values
-        /// </summary>
-        protected internal ByteVector vspace;
-
-        /// <summary>
-        /// This map stores hyphenation exceptions
-        /// </summary>
-        protected internal IDictionary<string, IList<object>> stoplist;
-
-        /// <summary>
-        /// This map stores the character classes
-        /// </summary>
-        protected internal TernaryTree classmap;
-
-        /// <summary>
-        /// Temporary map to store interletter values on pattern loading.
-        /// </summary>
-        [NonSerialized]
-        private TernaryTree ivalues;
-
-        public HyphenationTree()
-        {
-            stoplist = new HashMap<string, IList<object>>(23); // usually a small table
-            classmap = new TernaryTree();
-            vspace = new ByteVector();
-            vspace.Alloc(1); // this reserves index 0, which we don't use
-        }
-
-        /// <summary>
-        /// Packs the values by storing them in 4 bits, two values into a byte Values
-        /// range is from 0 to 9. We use zero as terminator, so we'll add 1 to the
-        /// value.
-        /// </summary>
-        /// <param name="values"> a string of digits from '0' to '9' representing the
-        ///        interletter values. </param>
-        /// <returns> the index into the vspace array where the packed values are stored. </returns>
-        protected internal virtual int PackValues(string values)
-        {
-            int i, n = values.Length;
-            int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
-            int offset = vspace.Alloc(m);
-            sbyte[] va = vspace.Array;
-            for (i = 0; i < n; i++)
-            {
-                int j = i >> 1;
-                sbyte v = (sbyte)((values[i] - '0' + 1) & 0x0f);
-                if ((i & 1) == 1)
-                {
-                    va[j + offset] = (sbyte)(va[j + offset] | v);
-                }
-                else
-                {
-                    va[j + offset] = (sbyte)(v << 4); // big endian
-                }
-            }
-            va[m - 1 + offset] = 0; // terminator
-            return offset;
-        }
-
-        protected internal virtual string UnpackValues(int k)
-        {
-            StringBuilder buf = new StringBuilder();
-            sbyte v = vspace[k++];
-            while (v != 0)
-            {
-                char c = (char)(((int)((uint)v >> 4)) - 1 + '0');
-                buf.Append(c);
-                c = (char)(v & 0x0f);
-                if (c == 0)
-                {
-                    break;
-                }
-                c = (char)(c - 1 + '0');
-                buf.Append(c);
-                v = vspace[k++];
-            }
-            return buf.ToString();
-        }
-
-        /// <summary>
-        /// Read hyphenation patterns from an XML file.
-        /// </summary>
-        /// <param name="f"> the filename </param>
-        /// <exception cref="IOException"> In case the parsing fails </exception>
-        public virtual void LoadPatterns(string filename)
-        {
-            LoadPatterns(filename, Encoding.UTF8);
-        }
-
-        /// <summary>
-        /// Read hyphenation patterns from an XML file.
-        /// </summary>
-        /// <param name="f"> the filename </param>
-        /// <exception cref="IOException"> In case the parsing fails </exception>
-        public virtual void LoadPatterns(string filename, Encoding encoding)
-        {
-            var src = new FileStream(filename, FileMode.Open, FileAccess.Read);
-            LoadPatterns(src, encoding);
-        }
-
-        /// <summary>
-        /// Read hyphenation patterns from an XML file.
-        /// </summary>
-        /// <param name="f"> the filename </param>
-        /// <exception cref="IOException"> In case the parsing fails </exception>
-        public virtual void LoadPatterns(FileInfo f)
-        {
-            LoadPatterns(f, Encoding.UTF8);
-        }
-
-        /// <summary>
-        /// Read hyphenation patterns from an XML file.
-        /// </summary>
-        /// <param name="f"> the filename </param>
-        /// <exception cref="IOException"> In case the parsing fails </exception>
-        public virtual void LoadPatterns(FileInfo f, Encoding encoding)
-        {
-            var src = new FileStream(f.FullName, FileMode.Open, FileAccess.Read);
-            LoadPatterns(src, encoding);
-        }
-
-        /// <summary>
-        /// Read hyphenation patterns from an XML file.
-        /// </summary>
-        /// <param name="source"> the InputSource for the file </param>
-        /// <exception cref="IOException"> In case the parsing fails </exception>
-        public virtual void LoadPatterns(Stream source)
-        {
-            LoadPatterns(source, Encoding.UTF8);
-        }
-
-        /// <summary>
-        /// Read hyphenation patterns from an XML file.
-        /// </summary>
-        /// <param name="source"> the InputSource for the file </param>
-        /// <exception cref="IOException"> In case the parsing fails </exception>
-        public virtual void LoadPatterns(Stream source, Encoding encoding)
-        {
-            // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
-            using (var reader = XmlReader.Create(new StreamReader(source, encoding), new XmlReaderSettings
-            {
-                DtdProcessing = DtdProcessing.Parse,
-                XmlResolver = new PatternParser.DtdResolver()
-            }))
-            {
-                LoadPatterns(reader);
-            }
-        }
-
-        public virtual void LoadPatterns(XmlReader source)
-        {
-            PatternParser pp = new PatternParser(this);
-            ivalues = new TernaryTree();
-
-            pp.Parse(source);
-
-            // patterns/values should be now in the tree
-            // let's optimize a bit
-            TrimToSize();
-            vspace.TrimToSize();
-            classmap.TrimToSize();
-
-            // get rid of the auxiliary map
-            ivalues = null;
-        }
-
-        public virtual string FindPattern(string pat)
-        {
-            int k = base.Find(pat);
-            if (k >= 0)
-            {
-                return UnpackValues(k);
-            }
-            return "";
-        }
-
-        /// <summary>
-        /// String compare, returns 0 if equal or t is a substring of s
-        /// </summary>
-        protected internal virtual int HStrCmp(char[] s, int si, char[] t, int ti)
-        {
-            for (; s[si] == t[ti]; si++, ti++)
-            {
-                if (s[si] == 0)
-                {
-                    return 0;
-                }
-            }
-            if (t[ti] == 0)
-            {
-                return 0;
-            }
-            return s[si] - t[ti];
-        }
-
-        protected internal virtual sbyte[] GetValues(int k)
-        {
-            StringBuilder buf = new StringBuilder();
-            sbyte v = vspace[k++];
-            while (v != 0)
-            {
-                char c = (char)((((int)((uint)v >> 4))) - 1);
-                buf.Append(c);
-                c = (char)(v & 0x0f);
-                if (c == 0)
-                {
-                    break;
-                }
-                c = (char)(c - 1);
-                buf.Append(c);
-                v = vspace[k++];
-            }
-            sbyte[] res = new sbyte[buf.Length];
-            for (int i = 0; i < res.Length; i++)
-            {
-                res[i] = (sbyte)buf[i];
-            }
-            return res;
-        }
-
-        /// <summary>
-        /// <para>
-        /// Search for all possible partial matches of word starting at index an update
-        /// interletter values. In other words, it does something like:
-        /// </para>
-        /// <code>
-        /// for(i=0; i&lt;patterns.length; i++) {
-        /// if ( word.substring(index).startsWidth(patterns[i]) )
-        /// update_interletter_values(patterns[i]);
-        /// }
-        /// </code>
-        /// <para>
-        /// But it is done in an efficient way since the patterns are stored in a
-        /// ternary tree. In fact, this is the whole purpose of having the tree: doing
-        /// this search without having to test every single pattern. The number of
-        /// patterns for languages such as English range from 4000 to 10000. Thus,
-        /// doing thousands of string comparisons for each word to hyphenate would be
-        /// really slow without the tree. The tradeoff is memory, but using a ternary
-        /// tree instead of a trie, almost halves the the memory used by Lout or TeX.
-        /// It's also faster than using a hash table
-        /// </para>
-        /// </summary>
-        /// <param name="word"> null terminated word to match </param>
-        /// <param name="index"> start index from word </param>
-        /// <param name="il"> interletter values array to update </param>
-        protected internal virtual void SearchPatterns(char[] word, int index, sbyte[] il)
-        {
-            sbyte[] values;
-            int i = index;
-            char p, q;
-            char sp = word[i];
-            p = root;
-
-            while (p > 0 && p < sc.Length)
-            {
-                if (sc[p] == 0xFFFF)
-                {
-                    if (HStrCmp(word, i, kv.Array, lo[p]) == 0)
-                    {
-                        values = GetValues(eq[p]); // data pointer is in eq[]
-                        int j = index;
-                        for (int k = 0; k < values.Length; k++)
-                        {
-                            if (j < il.Length && values[k] > il[j])
-                            {
-                                il[j] = values[k];
-                            }
-                            j++;
-                        }
-                    }
-                    return;
-                }
-                int d = sp - sc[p];
-                if (d == 0)
-                {
-                    if (sp == 0)
-                    {
-                        break;
-                    }
-                    sp = word[++i];
-                    p = eq[p];
-                    q = p;
-
-                    // look for a pattern ending at this position by searching for
-                    // the null char ( splitchar == 0 )
-                    while (q > 0 && q < sc.Length)
-                    {
-                        if (sc[q] == 0xFFFF) // stop at compressed branch
-                        {
-                            break;
-                        }
-                        if (sc[q] == 0)
-                        {
-                            values = GetValues(eq[q]);
-                            int j = index;
-                            for (int k = 0; k < values.Length; k++)
-                            {
-                                if (j < il.Length && values[k] > il[j])
-                                {
-                                    il[j] = values[k];
-                                }
-                                j++;
-                            }
-                            break;
-                        }
-                        else
-                        {
-                            q = lo[q];
-
-                            /// <summary>
-                            /// actually the code should be: q = sc[q] < 0 ? hi[q] : lo[q]; but
-                            /// java chars are unsigned
-                            /// </summary>
-                        }
-                    }
-                }
-                else
-                {
-                    p = d < 0 ? lo[p] : hi[p];
-                }
-            }
-        }
-
-        /// <summary>
-        /// Hyphenate word and return a Hyphenation object.
-        /// </summary>
-        /// <param name="word"> the word to be hyphenated </param>
-        /// <param name="remainCharCount"> Minimum number of characters allowed before the
-        ///        hyphenation point. </param>
-        /// <param name="pushCharCount"> Minimum number of characters allowed after the
-        ///        hyphenation point. </param>
-        /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
-        ///         hyphenated word or null if word is not hyphenated. </returns>
-        public virtual Hyphenation Hyphenate(string word, int remainCharCount, int pushCharCount)
-        {
-            char[] w = word.ToCharArray();
-            return Hyphenate(w, 0, w.Length, remainCharCount, pushCharCount);
-        }
-
-        /// <summary>
-        /// w = "****nnllllllnnn*****", where n is a non-letter, l is a letter, all n
-        /// may be absent, the first n is at offset, the first l is at offset +
-        /// iIgnoreAtBeginning; word = ".llllll.'\0'***", where all l in w are copied
-        /// into word. In the first part of the routine len = w.length, in the second
-        /// part of the routine len = word.length. Three indices are used: index(w),
-        /// the index in w, index(word), the index in word, letterindex(word), the
-        /// index in the letter part of word. The following relations exist: index(w) =
-        /// offset + i - 1 index(word) = i - iIgnoreAtBeginning letterindex(word) =
-        /// index(word) - 1 (see first loop). It follows that: index(w) - index(word) =
-        /// offset - 1 + iIgnoreAtBeginning index(w) = letterindex(word) + offset +
-        /// iIgnoreAtBeginning
-        /// </summary>
-
-        /// <summary>
-        /// Hyphenate word and return an array of hyphenation points.
-        /// </summary>
-        /// <param name="w"> char array that contains the word </param>
-        /// <param name="offset"> Offset to first character in word </param>
-        /// <param name="len"> Length of word </param>
-        /// <param name="remainCharCount"> Minimum number of characters allowed before the
-        ///        hyphenation point. </param>
-        /// <param name="pushCharCount"> Minimum number of characters allowed after the
-        ///        hyphenation point. </param>
-        /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
-        ///         hyphenated word or null if word is not hyphenated. </returns>
-        public virtual Hyphenation Hyphenate(char[] w, int offset, int len, int remainCharCount, int pushCharCount)
-        {
-            int i;
-            char[] word = new char[len + 3];
-
-            // normalize word
-            char[] c = new char[2];
-            int iIgnoreAtBeginning = 0;
-            int iLength = len;
-            bool bEndOfLetters = false;
-            for (i = 1; i <= len; i++)
-            {
-                c[0] = w[offset + i - 1];
-                int nc = classmap.Find(c, 0);
-                if (nc < 0) // found a non-letter character ...
-                {
-                    if (i == (1 + iIgnoreAtBeginning))
-                    {
-                        // ... before any letter character
-                        iIgnoreAtBeginning++;
-                    }
-                    else
-                    {
-                        // ... after a letter character
-                        bEndOfLetters = true;
-                    }
-                    iLength--;
-                }
-                else
-                {
-                    if (!bEndOfLetters)
-                    {
-                        word[i - iIgnoreAtBeginning] = (char)nc;
-                    }
-                    else
-                    {
-                        return null;
-                    }
-                }
-            }
-            len = iLength;
-            if (len < (remainCharCount + pushCharCount))
-            {
-                // word is too short to be hyphenated
-                return null;
-            }
-            int[] result = new int[len + 1];
-            int k = 0;
-
-            // check exception list first
-            string sw = new string(word, 1, len);
-            if (stoplist.ContainsKey(sw))
-            {
-                // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
-                // null)
-                IList<object> hw = stoplist[sw];
-                int j = 0;
-                for (i = 0; i < hw.Count; i++)
-                {
-                    object o = hw[i];
-                    // j = index(sw) = letterindex(word)?
-                    // result[k] = corresponding index(w)
-                    if (o is string)
-                    {
-                        j += ((string)o).Length;
-                        if (j >= remainCharCount && j < (len - pushCharCount))
-                        {
-                            result[k++] = j + iIgnoreAtBeginning;
-                        }
-                    }
-                }
-            }
-            else
-            {
-                // use algorithm to get hyphenation points
-                word[0] = '.'; // word start marker
-                word[len + 1] = '.'; // word end marker
-                word[len + 2] = (char)0; // null terminated
-                sbyte[] il = new sbyte[len + 3]; // initialized to zero
-                for (i = 0; i < len + 1; i++)
-                {
-                    SearchPatterns(word, i, il);
-                }
-
-                // hyphenation points are located where interletter value is odd
-                // i is letterindex(word),
-                // i + 1 is index(word),
-                // result[k] = corresponding index(w)
-                for (i = 0; i < len; i++)
-                {
-                    if (((il[i + 1] & 1) == 1) && i >= remainCharCount && i <= (len - pushCharCount))
-                    {
-                        result[k++] = i + iIgnoreAtBeginning;
-                    }
-                }
-            }
-
-            if (k > 0)
-            {
-                // trim result array
-                int[] res = new int[k + 2];
-                Array.Copy(result, 0, res, 1, k);
-                // We add the synthetical hyphenation points
-                // at the beginning and end of the word
-                res[0] = 0;
-                res[k + 1] = len;
-                return new Hyphenation(res);
-            }
-            else
-            {
-                return null;
-            }
-        }
-
-        /// <summary>
-        /// Add a character class to the tree. It is used by
-        /// <seealso cref="PatternParser PatternParser"/> as callback to add character classes.
-        /// Character classes define the valid word characters for hyphenation. If a
-        /// word contains a character not defined in any of the classes, it is not
-        /// hyphenated. It also defines a way to normalize the characters in order to
-        /// compare them with the stored patterns. Usually pattern files use only lower
-        /// case characters, in this case a class for letter 'a', for example, should
-        /// be defined as "aA", the first character being the normalization char.
-        /// </summary>
-        public virtual void AddClass(string chargroup)
-        {
-            if (chargroup.Length > 0)
-            {
-                char equivChar = chargroup[0];
-                char[] key = new char[2];
-                key[1] = (char)0;
-                for (int i = 0; i < chargroup.Length; i++)
-                {
-                    key[0] = chargroup[i];
-                    classmap.Insert(key, 0, equivChar);
-                }
-            }
-        }
-
-        /// <summary>
-        /// Add an exception to the tree. It is used by
-        /// <seealso cref="PatternParser PatternParser"/> class as callback to store the
-        /// hyphenation exceptions.
-        /// </summary>
-        /// <param name="word"> normalized word </param>
-        /// <param name="hyphenatedword"> a vector of alternating strings and
-        ///        <seealso cref="Hyphen hyphen"/> objects. </param>
-        public virtual void AddException(string word, List<object> hyphenatedword)
-        {
-            stoplist[word] = hyphenatedword;
-        }
-
-        /// <summary>
-        /// Add a pattern to the tree. Mainly, to be used by
-        /// <seealso cref="PatternParser PatternParser"/> class as callback to add a pattern to
-        /// the tree.
-        /// </summary>
-        /// <param name="pattern"> the hyphenation pattern </param>
-        /// <param name="ivalue"> interletter weight values indicating the desirability and
-        ///        priority of hyphenating at a given point within the pattern. It
-        ///        should contain only digit characters. (i.e. '0' to '9'). </param>
-        public virtual void AddPattern(string pattern, string ivalue)
-        {
-            int k = ivalues.Find(ivalue);
-            if (k <= 0)
-            {
-                k = PackValues(ivalue);
-                ivalues.Insert(ivalue, (char)k);
-            }
-            Insert(pattern, (char)k);
-        }
-
-        // public override void printStats(PrintStream @out)
-        // {
-        //@out.println("Value space size = " + Convert.ToString(vspace.length()));
-        //base.printStats(@out);
-
-        // }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
deleted file mode 100644
index 069badd..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
+++ /dev/null
@@ -1,54 +0,0 @@
-\ufeffusing System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// This interface is used to connect the XML pattern file parser to the
-    /// hyphenation tree.
-    /// 
-    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
-    /// </summary>
-    public interface IPatternConsumer
-	{
-
-	  /// <summary>
-	  /// Add a character class. A character class defines characters that are
-	  /// considered equivalent for the purpose of hyphenation (e.g. "aA"). It
-	  /// usually means to ignore case.
-	  /// </summary>
-	  /// <param name="chargroup"> character group </param>
-	  void AddClass(string chargroup);
-
-	  /// <summary>
-	  /// Add a hyphenation exception. An exception replaces the result obtained by
-	  /// the algorithm for cases for which this fails or the user wants to provide
-	  /// his own hyphenation. A hyphenatedword is a vector of alternating String's
-	  /// and <seealso cref="Hyphen"/> instances
-	  /// </summary>
-	  void AddException(string word, List<object> hyphenatedword);
-
-	  /// <summary>
-	  /// Add hyphenation patterns.
-	  /// </summary>
-	  /// <param name="pattern"> the pattern </param>
-	  /// <param name="values"> interletter values expressed as a string of digit characters. </param>
-	  void AddPattern(string pattern, string values);
-	}
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
deleted file mode 100644
index 8c00d19..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
+++ /dev/null
@@ -1,483 +0,0 @@
-\ufeffusing System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text;
-using System.Xml;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// A XMLReader document handler to read and parse hyphenation patterns from a XML
-    /// file.
-    /// 
-    /// LUCENENET: This class has been refactored from its Java counterpart to use XmlReader rather
-    /// than a SAX parser.
-    /// </summary>
-    public class PatternParser
-    {
-        internal int currElement;
-
-        internal IPatternConsumer consumer;
-
-        internal StringBuilder token;
-
-        internal List<object> exception;
-
-        internal char hyphenChar;
-
-        internal string errMsg;
-
-        internal const int ELEM_CLASSES = 1;
-
-        internal const int ELEM_EXCEPTIONS = 2;
-
-        internal const int ELEM_PATTERNS = 3;
-
-        internal const int ELEM_HYPHEN = 4;
-
-        public PatternParser()
-        {
-            token = new StringBuilder();
-            hyphenChar = '-'; // default
-        }
-
-        public PatternParser(IPatternConsumer consumer) : this()
-        {
-            this.consumer = consumer;
-        }
-
-        public virtual IPatternConsumer Consumer
-        {
-            set
-            {
-                this.consumer = value;
-            }
-        }
-
-        /// <summary>
-        /// Parses a hyphenation pattern file.
-        /// </summary>
-        /// <param name="filename"> the filename </param>
-        /// <exception cref="IOException"> In case of an exception while parsing </exception>
-        public virtual void Parse(string filename)
-        {
-            // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
-            using (var src = XmlReader.Create(filename, new XmlReaderSettings
-            {
-                DtdProcessing = DtdProcessing.Parse,
-                XmlResolver = new DtdResolver()
-            }))
-            {
-                Parse(src);
-            }
-        }
-
-        /// <summary>
-        /// Parses a hyphenation pattern file.
-        /// </summary>
-        /// <param name="file"> the pattern file </param>
-        public virtual void Parse(FileInfo file)
-        {
-            Parse(file, Encoding.UTF8);
-        }
-
-        /// <summary>
-        /// Parses a hyphenation pattern file.
-        /// </summary>
-        /// <param name="file"> the pattern file </param>
-        public virtual void Parse(FileInfo file, Encoding encoding)
-        {
-            using (var src = XmlReader.Create(new StreamReader(file.FullName, encoding), new XmlReaderSettings
-            {
-                DtdProcessing = DtdProcessing.Parse,
-                XmlResolver = new DtdResolver()
-            }))
-            {
-
-                Parse(src);
-            }
-        }
-
-        /// <summary>
-        /// Parses a hyphenation pattern file.
-        /// </summary>
-        /// <param name="file"> the pattern file </param>
-        public virtual void Parse(Stream xmlStream)
-        {
-            using (var src = XmlReader.Create(xmlStream, new XmlReaderSettings
-            {
-                DtdProcessing = DtdProcessing.Parse,
-                XmlResolver = new DtdResolver()
-            }))
-            {
-                Parse(src);
-            }
-        }
-
-        /// <summary>
-        /// Parses a hyphenation pattern file.
-        /// </summary>
-        /// <param name="source"> the InputSource for the file </param>
-        /// <exception cref="IOException"> In case of an exception while parsing </exception>
-        public virtual void Parse(XmlReader source)
-        {
-            source.MoveToContent();
-            while (source.Read())
-            {
-                ParseNode(source);
-            }
-        }
-
-        private void ParseNode(XmlReader node)
-        {
-            string uri, name, raw;
-            switch (node.NodeType)
-            {
-                case XmlNodeType.Element:
-
-                    // Element start
-                    uri = node.NamespaceURI;
-                    name = node.Name;
-                    bool isEmptyElement = node.IsEmptyElement;
-                    var attributes = GetAttributes(node);
-                    raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer
-
-                    this.StartElement(uri, name, raw, attributes);
-                    if (isEmptyElement)
-                    {
-                        this.EndElement(uri, name, raw);
-                    }
-                    break;
-
-                case XmlNodeType.Text:
-
-                    this.Characters(node.Value.ToCharArray(), 0, node.Value.Length);
-                    break;
-
-                case XmlNodeType.EndElement:
-                    uri = node.NamespaceURI;
-                    name = node.Name;
-                    raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer
-
-                    // Element end
-                    this.EndElement(uri, name, raw);
-                    break;
-            }
-        }
-
-        private IDictionary<string, string> GetAttributes(XmlReader node)
-        {
-            var result = new Dictionary<string, string>();
-            if (node.HasAttributes)
-            {
-                for (int i = 0; i < node.AttributeCount; i++)
-                {
-                    node.MoveToAttribute(i);
-                    result.Add(node.Name, node.Value);
-                }
-            }
-
-            return result;
-        }
-
-        protected internal virtual string ReadToken(StringBuilder chars)
-        {
-            string word;
-            bool space = false;
-            int i;
-            for (i = 0; i < chars.Length; i++)
-            {
-                if (char.IsWhiteSpace(chars[i]))
-                {
-                    space = true;
-                }
-                else
-                {
-                    break;
-                }
-            }
-            if (space)
-            {
-                // chars.delete(0,i);
-                for (int countr = i; countr < chars.Length; countr++)
-                {
-                    chars[countr - i] = chars[countr];
-                }
-                chars.Length = chars.Length - i;
-                if (token.Length > 0)
-                {
-                    word = token.ToString();
-                    token.Length = 0;
-                    return word;
-                }
-            }
-            space = false;
-            for (i = 0; i < chars.Length; i++)
-            {
-                if (char.IsWhiteSpace(chars[i]))
-                {
-                    space = true;
-                    break;
-                }
-            }
-            token.Append(chars.ToString(0, i - 0));
-            // chars.delete(0,i);
-            for (int countr = i; countr < chars.Length; countr++)
-            {
-                chars[countr - i] = chars[countr];
-            }
-            chars.Length = chars.Length - i;
-            if (space)
-            {
-                word = token.ToString();
-                token.Length = 0;
-                return word;
-            }
-            token.Append(chars.ToString());
-            return null;
-        }
-
-        protected internal static string GetPattern(string word)
-        {
-            StringBuilder pat = new StringBuilder();
-            int len = word.Length;
-            for (int i = 0; i < len; i++)
-            {
-                if (!char.IsDigit(word[i]))
-                {
-                    pat.Append(word[i]);
-                }
-            }
-            return pat.ToString();
-        }
-
-        protected internal virtual List<object> NormalizeException<T1>(List<T1> ex)
-        {
-            List<object> res = new List<object>();
-            for (int i = 0; i < ex.Count; i++)
-            {
-                object item = ex[i];
-                if (item is string)
-                {
-                    string str = (string)item;
-                    StringBuilder buf = new StringBuilder();
-                    for (int j = 0; j < str.Length; j++)
-                    {
-                        char c = str[j];
-                        if (c != hyphenChar)
-                        {
-                            buf.Append(c);
-                        }
-                        else
-                        {
-                            res.Add(buf.ToString());
-                            buf.Length = 0;
-                            char[] h = new char[1];
-                            h[0] = hyphenChar;
-                            // we use here hyphenChar which is not necessarily
-                            // the one to be printed
-                            res.Add(new Hyphen(new string(h), null, null));
-                        }
-                    }
-                    if (buf.Length > 0)
-                    {
-                        res.Add(buf.ToString());
-                    }
-                }
-                else
-                {
-                    res.Add(item);
-                }
-            }
-            return res;
-        }
-
-        protected internal virtual string GetExceptionWord<T1>(List<T1> ex)
-        {
-            StringBuilder res = new StringBuilder();
-            for (int i = 0; i < ex.Count; i++)
-            {
-                object item = ex[i];
-                if (item is string)
-                {
-                    res.Append((string)item);
-                }
-                else
-                {
-                    if (((Hyphen)item).noBreak != null)
-                    {
-                        res.Append(((Hyphen)item).noBreak);
-                    }
-                }
-            }
-            return res.ToString();
-        }
-
-        protected internal static string GetInterletterValues(string pat)
-        {
-            StringBuilder il = new StringBuilder();
-            string word = pat + "a"; // add dummy letter to serve as sentinel
-            int len = word.Length;
-            for (int i = 0; i < len; i++)
-            {
-                char c = word[i];
-                if (char.IsDigit(c))
-                {
-                    il.Append(c);
-                    i++;
-                }
-                else
-                {
-                    il.Append('0');
-                }
-            }
-            return il.ToString();
-        }
-
-        /// <summary>
-        /// LUCENENET specific helper class to force the DTD file to be read from the embedded resource
-        /// rather than from the file system.
-        /// </summary>
-        internal class DtdResolver : XmlUrlResolver
-        {
-            public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
-            {
-                string dtdFilename = "hyphenation.dtd";
-                if (dtdFilename.Equals(absoluteUri.Segments.LastOrDefault(), StringComparison.OrdinalIgnoreCase))
-                {
-                    var qualifedDtdFilename = string.Concat(GetType().Namespace, ".", dtdFilename);
-                    return GetType().Assembly.GetManifestResourceStream(qualifedDtdFilename);
-                }
-
-                return base.GetEntity(absoluteUri, role, ofObjectToReturn);
-            }
-        }
-
-        //
-        // ContentHandler methods
-        //
-
-        /// <seealso cref= org.xml.sax.ContentHandler#startElement(java.lang.String,
-        ///      java.lang.String, java.lang.String, org.xml.sax.Attributes) </seealso>
-        public void StartElement(string uri, string local, string raw, IDictionary<string, string> attrs)
-        {
-            if (local.Equals("hyphen-char"))
-            {
-                string h = attrs.ContainsKey("value") ? attrs["value"] : null;
-                if (h != null && h.Length == 1)
-                {
-                    hyphenChar = h[0];
-                }
-            }
-            else if (local.Equals("classes"))
-            {
-                currElement = ELEM_CLASSES;
-            }
-            else if (local.Equals("patterns"))
-            {
-                currElement = ELEM_PATTERNS;
-            }
-            else if (local.Equals("exceptions"))
-            {
-                currElement = ELEM_EXCEPTIONS;
-                exception = new List<object>();
-            }
-            else if (local.Equals("hyphen"))
-            {
-                if (token.Length > 0)
-                {
-                    exception.Add(token.ToString());
-                }
-                exception.Add(new Hyphen(attrs["pre"], attrs["no"], attrs["post"]));
-                currElement = ELEM_HYPHEN;
-            }
-            token.Length = 0;
-        }
-
-        /// <seealso cref= org.xml.sax.ContentHandler#endElement(java.lang.String,
-        ///      java.lang.String, java.lang.String) </seealso>
-        public void EndElement(string uri, string local, string raw)
-        {
-            if (token.Length > 0)
-            {
-                string word = token.ToString();
-                switch (currElement)
-                {
-                    case ELEM_CLASSES:
-                        consumer.AddClass(word);
-                        break;
-                    case ELEM_EXCEPTIONS:
-                        exception.Add(word);
-                        exception = NormalizeException(exception);
-                        consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
-                        break;
-                    case ELEM_PATTERNS:
-                        consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
-                        break;
-                    case ELEM_HYPHEN:
-                        // nothing to do
-                        break;
-                }
-                if (currElement != ELEM_HYPHEN)
-                {
-                    token.Length = 0;
-                }
-            }
-            if (currElement == ELEM_HYPHEN)
-            {
-                currElement = ELEM_EXCEPTIONS;
-            }
-            else
-            {
-                currElement = 0;
-            }
-        }
-
-        /// <seealso cref= org.xml.sax.ContentHandler#characters(char[], int, int) </seealso>
-        public void Characters(char[] ch, int start, int length)
-        {
-            StringBuilder chars = new StringBuilder(length);
-            chars.Append(ch, start, length);
-            string word = ReadToken(chars);
-            while (word != null)
-            {
-                // System.out.println("\"" + word + "\"");
-                switch (currElement)
-                {
-                    case ELEM_CLASSES:
-                        consumer.AddClass(word);
-                        break;
-                    case ELEM_EXCEPTIONS:
-                        exception.Add(word);
-                        exception = NormalizeException(exception);
-                        consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
-                        exception.Clear();
-                        break;
-                    case ELEM_PATTERNS:
-                        consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
-                        break;
-                }
-                word = ReadToken(chars);
-            }
-
-        }
-    }
-}
\ No newline at end of file

[4/4] lucenenet git commit: Merge remote-tracking branch 'devmk/master'

Posted by sy...@apache.org.

Merge remote-tracking branch 'devmk/master'


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7214c8a3
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7214c8a3
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7214c8a3

Branch: refs/heads/master
Commit: 7214c8a3c991889f0cb024b0b8045304b1b3d6df
Parents: e75dbf6 7ecb752
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Wed Dec 14 22:00:34 2016 +0200
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Wed Dec 14 22:00:34 2016 +0200

----------------------------------------------------------------------
 .../Analysis/Compound/Hyphenation/ByteVector.cs | 156 ++++
 .../Analysis/Compound/Hyphenation/CharVector.cs | 171 ++++
 .../Analysis/Compound/Hyphenation/Hyphen.cs     |  72 ++
 .../Compound/Hyphenation/Hyphenation.cs         |  53 ++
 .../Compound/Hyphenation/HyphenationTree.cs     | 581 +++++++++++++
 .../Compound/Hyphenation/PatternConsumer.cs     |  54 ++
 .../Compound/Hyphenation/PatternParser.cs       | 483 +++++++++++
 .../Compound/Hyphenation/TernaryTree.cs         | 816 +++++++++++++++++++
 .../Compound/Hyphenation/hyphenation.dtd        |  68 ++
 .../Analysis/Compound/hyphenation/ByteVector.cs | 156 ----
 .../Analysis/Compound/hyphenation/CharVector.cs | 171 ----
 .../Analysis/Compound/hyphenation/Hyphen.cs     |  72 --
 .../Compound/hyphenation/Hyphenation.cs         |  53 --
 .../Compound/hyphenation/HyphenationTree.cs     | 581 -------------
 .../Compound/hyphenation/PatternConsumer.cs     |  54 --
 .../Compound/hyphenation/PatternParser.cs       | 483 -----------
 .../Compound/hyphenation/TernaryTree.cs         | 816 -------------------
 .../Compound/hyphenation/hyphenation.dtd        |  68 --
 18 files changed, 2454 insertions(+), 2454 deletions(-)
----------------------------------------------------------------------

[3/4] lucenenet git commit: Renamed hyphenation to Hyphenation to fix build and run on case sensitive file systems

Posted by sy...@apache.org.

Renamed hyphenation to Hyphenation to fix build and run on case sensitive file systems


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7ecb7529
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7ecb7529
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7ecb7529

Branch: refs/heads/master
Commit: 7ecb7529066c1d95dce83b175dac3322fc4068ab
Parents: 96d38ef
Author: Yaroslav <sl...@gmail.com>
Authored: Mon Nov 28 21:23:25 2016 +0200
Committer: Yaroslav <sl...@gmail.com>
Committed: Mon Nov 28 21:23:25 2016 +0200

----------------------------------------------------------------------
 .../Analysis/Compound/Hyphenation/ByteVector.cs | 156 ++++
 .../Analysis/Compound/Hyphenation/CharVector.cs | 171 ++++
 .../Analysis/Compound/Hyphenation/Hyphen.cs     |  72 ++
 .../Compound/Hyphenation/Hyphenation.cs         |  53 ++
 .../Compound/Hyphenation/HyphenationTree.cs     | 581 +++++++++++++
 .../Compound/Hyphenation/PatternConsumer.cs     |  54 ++
 .../Compound/Hyphenation/PatternParser.cs       | 483 +++++++++++
 .../Compound/Hyphenation/TernaryTree.cs         | 816 +++++++++++++++++++
 .../Compound/Hyphenation/hyphenation.dtd        |  68 ++
 .../Analysis/Compound/hyphenation/ByteVector.cs | 156 ----
 .../Analysis/Compound/hyphenation/CharVector.cs | 171 ----
 .../Analysis/Compound/hyphenation/Hyphen.cs     |  72 --
 .../Compound/hyphenation/Hyphenation.cs         |  53 --
 .../Compound/hyphenation/HyphenationTree.cs     | 581 -------------
 .../Compound/hyphenation/PatternConsumer.cs     |  54 --
 .../Compound/hyphenation/PatternParser.cs       | 483 -----------
 .../Compound/hyphenation/TernaryTree.cs         | 816 -------------------
 .../Compound/hyphenation/hyphenation.dtd        |  68 --
 18 files changed, 2454 insertions(+), 2454 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs
new file mode 100644
index 0000000..6442d11
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs
@@ -0,0 +1,156 @@
+\ufeffnamespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This class implements a simple byte vector with access to the underlying
+    /// array.
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+    public class ByteVector
+    {
+
+        /// <summary>
+        /// Capacity increment size
+        /// </summary>
+        private const int DEFAULT_BLOCK_SIZE = 2048;
+
+        private int blockSize;
+
+        /// <summary>
+        /// The encapsulated array
+        /// </summary>
+        private sbyte[] array;
+
+        /// <summary>
+        /// Points to next free item
+        /// </summary>
+        private int n;
+
+        public ByteVector() : this(DEFAULT_BLOCK_SIZE)
+        {
+        }
+
+        public ByteVector(int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = new sbyte[blockSize];
+            n = 0;
+        }
+
+        public ByteVector(sbyte[] a)
+        {
+            blockSize = DEFAULT_BLOCK_SIZE;
+            array = a;
+            n = 0;
+        }
+
+        public ByteVector(sbyte[] a, int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = a;
+            n = 0;
+        }
+
+        public virtual sbyte[] Array
+        {
+            get
+            {
+                return array;
+            }
+        }
+
+        /// <summary>
+        /// LUCENENET indexer for .NET
+        /// </summary>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        public virtual sbyte this[int index]
+        {
+            get { return array[index]; }
+            set { array[index] = value; }
+        }
+
+        /// <summary>
+        /// return number of items in array
+        /// </summary>
+        public virtual int Length
+        {
+            get { return n; }
+        }
+
+        /// <summary>
+        /// returns current capacity of array
+        /// </summary>
+        public virtual int Capacity
+        {
+            get { return array.Length; }
+        }
+
+        //public virtual void Put(int index, sbyte val)
+        //{
+        //    array[index] = val;
+        //}
+
+        //public virtual sbyte Get(int index)
+        //{
+        //    return array[index];
+        //}
+
+        /// <summary>
+        /// This is to implement memory allocation in the array. Like malloc().
+        /// </summary>
+        public virtual int Alloc(int size)
+        {
+            int index = n;
+            int len = array.Length;
+            if (n + size >= len)
+            {
+                sbyte[] aux = new sbyte[len + blockSize];
+                System.Array.Copy(array, 0, aux, 0, len);
+                array = aux;
+            }
+            n += size;
+            return index;
+        }
+
+        public virtual void TrimToSize()
+        {
+            if (n < array.Length)
+            {
+                sbyte[] aux = new sbyte[n];
+                System.Array.Copy(array, 0, aux, 0, n);
+                array = aux;
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs
new file mode 100644
index 0000000..26fcea5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs
@@ -0,0 +1,171 @@
+\ufeffusing System;
+
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This class implements a simple char vector with access to the underlying
+    /// array.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+    public class CharVector : ICloneable
+    {
+
+        /// <summary>
+        /// Capacity increment size
+        /// </summary>
+        private const int DEFAULT_BLOCK_SIZE = 2048;
+
+        private int blockSize;
+
+        /// <summary>
+        /// The encapsulated array
+        /// </summary>
+        private char[] array;
+
+        /// <summary>
+        /// Points to next free item
+        /// </summary>
+        private int n;
+
+        public CharVector() : this(DEFAULT_BLOCK_SIZE)
+        {
+        }
+
+        public CharVector(int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = new char[blockSize];
+            n = 0;
+        }
+
+        public CharVector(char[] a)
+        {
+            blockSize = DEFAULT_BLOCK_SIZE;
+            array = a;
+            n = a.Length;
+        }
+
+        public CharVector(char[] a, int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = a;
+            n = a.Length;
+        }
+
+        /// <summary>
+        /// Reset Vector but don't resize or clear elements
+        /// </summary>
+        public virtual void Clear()
+        {
+            n = 0;
+        }
+
+        public virtual object Clone()
+        {
+            CharVector cv = new CharVector(array, blockSize);
+            cv.n = this.n;
+            return cv;
+        }
+
+        public virtual char[] Array
+        {
+            get
+            {
+                return array;
+            }
+        }
+
+        /// <summary>
+        /// LUCENENET indexer for .NET
+        /// </summary>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        public virtual char this[int index]
+        {
+            get { return array[index]; }
+            set { array[index] = value; }
+        }
+
+        /// <summary>
+        /// return number of items in array
+        /// </summary>
+        public virtual int Length()
+        {
+            return n;
+        }
+
+        /// <summary>
+        /// returns current capacity of array
+        /// </summary>
+        public virtual int Capacity
+        {
+            get { return array.Length; }
+        }
+
+        //public virtual void Put(int index, char val)
+        //{
+        //    array[index] = val;
+        //}
+
+        //public virtual char get(int index)
+        //{
+        //    return array[index];
+        //}
+
+        public virtual int Alloc(int size)
+        {
+            int index = n;
+            int len = array.Length;
+            if (n + size >= len)
+            {
+                char[] aux = new char[len + blockSize];
+                System.Array.Copy(array, 0, aux, 0, len);
+                array = aux;
+            }
+            n += size;
+            return index;
+        }
+
+        public virtual void TrimToSize()
+        {
+            if (n < array.Length)
+            {
+                char[] aux = new char[n];
+                System.Array.Copy(array, 0, aux, 0, n);
+                array = aux;
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs
new file mode 100644
index 0000000..91009b1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs
@@ -0,0 +1,72 @@
+\ufeffusing System.Text;
+
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This class represents a hyphen. A 'full' hyphen is made of 3 parts: the
+    /// pre-break text, post-break text and no-break. If no line-break is generated
+    /// at this position, the no-break text is used, otherwise, pre-break and
+    /// post-break are used. Typically, pre-break is equal to the hyphen character
+    /// and the others are empty. However, this general scheme allows support for
+    /// cases in some languages where words change spelling if they're split across
+    /// lines, like german's 'backen' which hyphenates 'bak-ken'. BTW, this comes
+    /// from TeX.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+    public class Hyphen
+    {
+        public string preBreak;
+
+        public string noBreak;
+
+        public string postBreak;
+
+        internal Hyphen(string pre, string no, string post)
+        {
+            preBreak = pre;
+            noBreak = no;
+            postBreak = post;
+        }
+
+        internal Hyphen(string pre)
+        {
+            preBreak = pre;
+            noBreak = null;
+            postBreak = null;
+        }
+
+        public override string ToString()
+        {
+            if (noBreak == null && postBreak == null && preBreak != null && preBreak.Equals("-"))
+            {
+                return "-";
+            }
+            StringBuilder res = new StringBuilder("{");
+            res.Append(preBreak);
+            res.Append("}{");
+            res.Append(postBreak);
+            res.Append("}{");
+            res.Append(noBreak);
+            res.Append('}');
+            return res.ToString();
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs
new file mode 100644
index 0000000..fdbac29
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs
@@ -0,0 +1,53 @@
+\ufeffnamespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This class represents a hyphenated word.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
+    /// </summary>
+    public class Hyphenation
+    {
+
+        private readonly int[] hyphenPoints;
+
+        /// <summary>
+        /// rawWord as made of alternating strings and <seealso cref="Hyphen"/> instances
+        /// </summary>
+        internal Hyphenation(int[] points)
+        {
+            hyphenPoints = points;
+        }
+
+        /// <returns> the number of hyphenation points in the word </returns>
+        public virtual int Length
+        {
+            get { return hyphenPoints.Length; }
+        }
+
+        /// <returns> the hyphenation points </returns>
+        public virtual int[] HyphenationPoints
+        {
+            get
+            {
+                return hyphenPoints;
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
new file mode 100644
index 0000000..287f6f3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
@@ -0,0 +1,581 @@
+\ufeffusing Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using System.Xml;
+
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+	/// This tree structure stores the hyphenation patterns in an efficient way for
+	/// fast lookup. It provides the provides the method to hyphenate a word.
+	/// 
+	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+	/// </summary>
+	public class HyphenationTree : TernaryTree, IPatternConsumer
+    {
+
+        /// <summary>
+        /// value space: stores the interletter values
+        /// </summary>
+        protected internal ByteVector vspace;
+
+        /// <summary>
+        /// This map stores hyphenation exceptions
+        /// </summary>
+        protected internal IDictionary<string, IList<object>> stoplist;
+
+        /// <summary>
+        /// This map stores the character classes
+        /// </summary>
+        protected internal TernaryTree classmap;
+
+        /// <summary>
+        /// Temporary map to store interletter values on pattern loading.
+        /// </summary>
+        [NonSerialized]
+        private TernaryTree ivalues;
+
+        public HyphenationTree()
+        {
+            stoplist = new HashMap<string, IList<object>>(23); // usually a small table
+            classmap = new TernaryTree();
+            vspace = new ByteVector();
+            vspace.Alloc(1); // this reserves index 0, which we don't use
+        }
+
+        /// <summary>
+        /// Packs the values by storing them in 4 bits, two values into a byte Values
+        /// range is from 0 to 9. We use zero as terminator, so we'll add 1 to the
+        /// value.
+        /// </summary>
+        /// <param name="values"> a string of digits from '0' to '9' representing the
+        ///        interletter values. </param>
+        /// <returns> the index into the vspace array where the packed values are stored. </returns>
+        protected internal virtual int PackValues(string values)
+        {
+            int i, n = values.Length;
+            int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
+            int offset = vspace.Alloc(m);
+            sbyte[] va = vspace.Array;
+            for (i = 0; i < n; i++)
+            {
+                int j = i >> 1;
+                sbyte v = (sbyte)((values[i] - '0' + 1) & 0x0f);
+                if ((i & 1) == 1)
+                {
+                    va[j + offset] = (sbyte)(va[j + offset] | v);
+                }
+                else
+                {
+                    va[j + offset] = (sbyte)(v << 4); // big endian
+                }
+            }
+            va[m - 1 + offset] = 0; // terminator
+            return offset;
+        }
+
+        protected internal virtual string UnpackValues(int k)
+        {
+            StringBuilder buf = new StringBuilder();
+            sbyte v = vspace[k++];
+            while (v != 0)
+            {
+                char c = (char)(((int)((uint)v >> 4)) - 1 + '0');
+                buf.Append(c);
+                c = (char)(v & 0x0f);
+                if (c == 0)
+                {
+                    break;
+                }
+                c = (char)(c - 1 + '0');
+                buf.Append(c);
+                v = vspace[k++];
+            }
+            return buf.ToString();
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="f"> the filename </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(string filename)
+        {
+            LoadPatterns(filename, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="f"> the filename </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(string filename, Encoding encoding)
+        {
+            var src = new FileStream(filename, FileMode.Open, FileAccess.Read);
+            LoadPatterns(src, encoding);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="f"> the filename </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(FileInfo f)
+        {
+            LoadPatterns(f, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="f"> the filename </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(FileInfo f, Encoding encoding)
+        {
+            var src = new FileStream(f.FullName, FileMode.Open, FileAccess.Read);
+            LoadPatterns(src, encoding);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="source"> the InputSource for the file </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(Stream source)
+        {
+            LoadPatterns(source, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="source"> the InputSource for the file </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(Stream source, Encoding encoding)
+        {
+            // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
+            using (var reader = XmlReader.Create(new StreamReader(source, encoding), new XmlReaderSettings
+            {
+                DtdProcessing = DtdProcessing.Parse,
+                XmlResolver = new PatternParser.DtdResolver()
+            }))
+            {
+                LoadPatterns(reader);
+            }
+        }
+
+        public virtual void LoadPatterns(XmlReader source)
+        {
+            PatternParser pp = new PatternParser(this);
+            ivalues = new TernaryTree();
+
+            pp.Parse(source);
+
+            // patterns/values should be now in the tree
+            // let's optimize a bit
+            TrimToSize();
+            vspace.TrimToSize();
+            classmap.TrimToSize();
+
+            // get rid of the auxiliary map
+            ivalues = null;
+        }
+
+        public virtual string FindPattern(string pat)
+        {
+            int k = base.Find(pat);
+            if (k >= 0)
+            {
+                return UnpackValues(k);
+            }
+            return "";
+        }
+
+        /// <summary>
+        /// String compare, returns 0 if equal or t is a substring of s
+        /// </summary>
+        protected internal virtual int HStrCmp(char[] s, int si, char[] t, int ti)
+        {
+            for (; s[si] == t[ti]; si++, ti++)
+            {
+                if (s[si] == 0)
+                {
+                    return 0;
+                }
+            }
+            if (t[ti] == 0)
+            {
+                return 0;
+            }
+            return s[si] - t[ti];
+        }
+
+        protected internal virtual sbyte[] GetValues(int k)
+        {
+            StringBuilder buf = new StringBuilder();
+            sbyte v = vspace[k++];
+            while (v != 0)
+            {
+                char c = (char)((((int)((uint)v >> 4))) - 1);
+                buf.Append(c);
+                c = (char)(v & 0x0f);
+                if (c == 0)
+                {
+                    break;
+                }
+                c = (char)(c - 1);
+                buf.Append(c);
+                v = vspace[k++];
+            }
+            sbyte[] res = new sbyte[buf.Length];
+            for (int i = 0; i < res.Length; i++)
+            {
+                res[i] = (sbyte)buf[i];
+            }
+            return res;
+        }
+
+        /// <summary>
+        /// <para>
+        /// Search for all possible partial matches of word starting at index an update
+        /// interletter values. In other words, it does something like:
+        /// </para>
+        /// <code>
+        /// for(i=0; i&lt;patterns.length; i++) {
+        /// if ( word.substring(index).startsWidth(patterns[i]) )
+        /// update_interletter_values(patterns[i]);
+        /// }
+        /// </code>
+        /// <para>
+        /// But it is done in an efficient way since the patterns are stored in a
+        /// ternary tree. In fact, this is the whole purpose of having the tree: doing
+        /// this search without having to test every single pattern. The number of
+        /// patterns for languages such as English range from 4000 to 10000. Thus,
+        /// doing thousands of string comparisons for each word to hyphenate would be
+        /// really slow without the tree. The tradeoff is memory, but using a ternary
+        /// tree instead of a trie, almost halves the the memory used by Lout or TeX.
+        /// It's also faster than using a hash table
+        /// </para>
+        /// </summary>
+        /// <param name="word"> null terminated word to match </param>
+        /// <param name="index"> start index from word </param>
+        /// <param name="il"> interletter values array to update </param>
+        protected internal virtual void SearchPatterns(char[] word, int index, sbyte[] il)
+        {
+            sbyte[] values;
+            int i = index;
+            char p, q;
+            char sp = word[i];
+            p = root;
+
+            while (p > 0 && p < sc.Length)
+            {
+                if (sc[p] == 0xFFFF)
+                {
+                    if (HStrCmp(word, i, kv.Array, lo[p]) == 0)
+                    {
+                        values = GetValues(eq[p]); // data pointer is in eq[]
+                        int j = index;
+                        for (int k = 0; k < values.Length; k++)
+                        {
+                            if (j < il.Length && values[k] > il[j])
+                            {
+                                il[j] = values[k];
+                            }
+                            j++;
+                        }
+                    }
+                    return;
+                }
+                int d = sp - sc[p];
+                if (d == 0)
+                {
+                    if (sp == 0)
+                    {
+                        break;
+                    }
+                    sp = word[++i];
+                    p = eq[p];
+                    q = p;
+
+                    // look for a pattern ending at this position by searching for
+                    // the null char ( splitchar == 0 )
+                    while (q > 0 && q < sc.Length)
+                    {
+                        if (sc[q] == 0xFFFF) // stop at compressed branch
+                        {
+                            break;
+                        }
+                        if (sc[q] == 0)
+                        {
+                            values = GetValues(eq[q]);
+                            int j = index;
+                            for (int k = 0; k < values.Length; k++)
+                            {
+                                if (j < il.Length && values[k] > il[j])
+                                {
+                                    il[j] = values[k];
+                                }
+                                j++;
+                            }
+                            break;
+                        }
+                        else
+                        {
+                            q = lo[q];
+
+                            /// <summary>
+                            /// actually the code should be: q = sc[q] < 0 ? hi[q] : lo[q]; but
+                            /// java chars are unsigned
+                            /// </summary>
+                        }
+                    }
+                }
+                else
+                {
+                    p = d < 0 ? lo[p] : hi[p];
+                }
+            }
+        }
+
+        /// <summary>
+        /// Hyphenate word and return a Hyphenation object.
+        /// </summary>
+        /// <param name="word"> the word to be hyphenated </param>
+        /// <param name="remainCharCount"> Minimum number of characters allowed before the
+        ///        hyphenation point. </param>
+        /// <param name="pushCharCount"> Minimum number of characters allowed after the
+        ///        hyphenation point. </param>
+        /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
+        ///         hyphenated word or null if word is not hyphenated. </returns>
+        public virtual Hyphenation Hyphenate(string word, int remainCharCount, int pushCharCount)
+        {
+            char[] w = word.ToCharArray();
+            return Hyphenate(w, 0, w.Length, remainCharCount, pushCharCount);
+        }
+
+        /// <summary>
+        /// w = "****nnllllllnnn*****", where n is a non-letter, l is a letter, all n
+        /// may be absent, the first n is at offset, the first l is at offset +
+        /// iIgnoreAtBeginning; word = ".llllll.'\0'***", where all l in w are copied
+        /// into word. In the first part of the routine len = w.length, in the second
+        /// part of the routine len = word.length. Three indices are used: index(w),
+        /// the index in w, index(word), the index in word, letterindex(word), the
+        /// index in the letter part of word. The following relations exist: index(w) =
+        /// offset + i - 1 index(word) = i - iIgnoreAtBeginning letterindex(word) =
+        /// index(word) - 1 (see first loop). It follows that: index(w) - index(word) =
+        /// offset - 1 + iIgnoreAtBeginning index(w) = letterindex(word) + offset +
+        /// iIgnoreAtBeginning
+        /// </summary>
+
+        /// <summary>
+        /// Hyphenate word and return an array of hyphenation points.
+        /// </summary>
+        /// <param name="w"> char array that contains the word </param>
+        /// <param name="offset"> Offset to first character in word </param>
+        /// <param name="len"> Length of word </param>
+        /// <param name="remainCharCount"> Minimum number of characters allowed before the
+        ///        hyphenation point. </param>
+        /// <param name="pushCharCount"> Minimum number of characters allowed after the
+        ///        hyphenation point. </param>
+        /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
+        ///         hyphenated word or null if word is not hyphenated. </returns>
+        public virtual Hyphenation Hyphenate(char[] w, int offset, int len, int remainCharCount, int pushCharCount)
+        {
+            int i;
+            char[] word = new char[len + 3];
+
+            // normalize word
+            char[] c = new char[2];
+            int iIgnoreAtBeginning = 0;
+            int iLength = len;
+            bool bEndOfLetters = false;
+            for (i = 1; i <= len; i++)
+            {
+                c[0] = w[offset + i - 1];
+                int nc = classmap.Find(c, 0);
+                if (nc < 0) // found a non-letter character ...
+                {
+                    if (i == (1 + iIgnoreAtBeginning))
+                    {
+                        // ... before any letter character
+                        iIgnoreAtBeginning++;
+                    }
+                    else
+                    {
+                        // ... after a letter character
+                        bEndOfLetters = true;
+                    }
+                    iLength--;
+                }
+                else
+                {
+                    if (!bEndOfLetters)
+                    {
+                        word[i - iIgnoreAtBeginning] = (char)nc;
+                    }
+                    else
+                    {
+                        return null;
+                    }
+                }
+            }
+            len = iLength;
+            if (len < (remainCharCount + pushCharCount))
+            {
+                // word is too short to be hyphenated
+                return null;
+            }
+            int[] result = new int[len + 1];
+            int k = 0;
+
+            // check exception list first
+            string sw = new string(word, 1, len);
+            if (stoplist.ContainsKey(sw))
+            {
+                // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
+                // null)
+                IList<object> hw = stoplist[sw];
+                int j = 0;
+                for (i = 0; i < hw.Count; i++)
+                {
+                    object o = hw[i];
+                    // j = index(sw) = letterindex(word)?
+                    // result[k] = corresponding index(w)
+                    if (o is string)
+                    {
+                        j += ((string)o).Length;
+                        if (j >= remainCharCount && j < (len - pushCharCount))
+                        {
+                            result[k++] = j + iIgnoreAtBeginning;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                // use algorithm to get hyphenation points
+                word[0] = '.'; // word start marker
+                word[len + 1] = '.'; // word end marker
+                word[len + 2] = (char)0; // null terminated
+                sbyte[] il = new sbyte[len + 3]; // initialized to zero
+                for (i = 0; i < len + 1; i++)
+                {
+                    SearchPatterns(word, i, il);
+                }
+
+                // hyphenation points are located where interletter value is odd
+                // i is letterindex(word),
+                // i + 1 is index(word),
+                // result[k] = corresponding index(w)
+                for (i = 0; i < len; i++)
+                {
+                    if (((il[i + 1] & 1) == 1) && i >= remainCharCount && i <= (len - pushCharCount))
+                    {
+                        result[k++] = i + iIgnoreAtBeginning;
+                    }
+                }
+            }
+
+            if (k > 0)
+            {
+                // trim result array
+                int[] res = new int[k + 2];
+                Array.Copy(result, 0, res, 1, k);
+                // We add the synthetical hyphenation points
+                // at the beginning and end of the word
+                res[0] = 0;
+                res[k + 1] = len;
+                return new Hyphenation(res);
+            }
+            else
+            {
+                return null;
+            }
+        }
+
+        /// <summary>
+        /// Add a character class to the tree. It is used by
+        /// <seealso cref="PatternParser PatternParser"/> as callback to add character classes.
+        /// Character classes define the valid word characters for hyphenation. If a
+        /// word contains a character not defined in any of the classes, it is not
+        /// hyphenated. It also defines a way to normalize the characters in order to
+        /// compare them with the stored patterns. Usually pattern files use only lower
+        /// case characters, in this case a class for letter 'a', for example, should
+        /// be defined as "aA", the first character being the normalization char.
+        /// </summary>
+        public virtual void AddClass(string chargroup)
+        {
+            if (chargroup.Length > 0)
+            {
+                char equivChar = chargroup[0];
+                char[] key = new char[2];
+                key[1] = (char)0;
+                for (int i = 0; i < chargroup.Length; i++)
+                {
+                    key[0] = chargroup[i];
+                    classmap.Insert(key, 0, equivChar);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Add an exception to the tree. It is used by
+        /// <seealso cref="PatternParser PatternParser"/> class as callback to store the
+        /// hyphenation exceptions.
+        /// </summary>
+        /// <param name="word"> normalized word </param>
+        /// <param name="hyphenatedword"> a vector of alternating strings and
+        ///        <seealso cref="Hyphen hyphen"/> objects. </param>
+        public virtual void AddException(string word, List<object> hyphenatedword)
+        {
+            stoplist[word] = hyphenatedword;
+        }
+
+        /// <summary>
+        /// Add a pattern to the tree. Mainly, to be used by
+        /// <seealso cref="PatternParser PatternParser"/> class as callback to add a pattern to
+        /// the tree.
+        /// </summary>
+        /// <param name="pattern"> the hyphenation pattern </param>
+        /// <param name="ivalue"> interletter weight values indicating the desirability and
+        ///        priority of hyphenating at a given point within the pattern. It
+        ///        should contain only digit characters. (i.e. '0' to '9'). </param>
+        public virtual void AddPattern(string pattern, string ivalue)
+        {
+            int k = ivalues.Find(ivalue);
+            if (k <= 0)
+            {
+                k = PackValues(ivalue);
+                ivalues.Insert(ivalue, (char)k);
+            }
+            Insert(pattern, (char)k);
+        }
+
+        // public override void printStats(PrintStream @out)
+        // {
+        //@out.println("Value space size = " + Convert.ToString(vspace.length()));
+        //base.printStats(@out);
+
+        // }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs
new file mode 100644
index 0000000..069badd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs
@@ -0,0 +1,54 @@
+\ufeffusing System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This interface is used to connect the XML pattern file parser to the
+    /// hyphenation tree.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
+    /// </summary>
+    public interface IPatternConsumer
+	{
+
+	  /// <summary>
+	  /// Add a character class. A character class defines characters that are
+	  /// considered equivalent for the purpose of hyphenation (e.g. "aA"). It
+	  /// usually means to ignore case.
+	  /// </summary>
+	  /// <param name="chargroup"> character group </param>
+	  void AddClass(string chargroup);
+
+	  /// <summary>
+	  /// Add a hyphenation exception. An exception replaces the result obtained by
+	  /// the algorithm for cases for which this fails or the user wants to provide
+	  /// his own hyphenation. A hyphenatedword is a vector of alternating String's
+	  /// and <seealso cref="Hyphen"/> instances
+	  /// </summary>
+	  void AddException(string word, List<object> hyphenatedword);
+
+	  /// <summary>
+	  /// Add hyphenation patterns.
+	  /// </summary>
+	  /// <param name="pattern"> the pattern </param>
+	  /// <param name="values"> interletter values expressed as a string of digit characters. </param>
+	  void AddPattern(string pattern, string values);
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
new file mode 100644
index 0000000..8c00d19
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
@@ -0,0 +1,483 @@
+\ufeffusing System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Xml;
+
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A XMLReader document handler to read and parse hyphenation patterns from a XML
+    /// file.
+    /// 
+    /// LUCENENET: This class has been refactored from its Java counterpart to use XmlReader rather
+    /// than a SAX parser.
+    /// </summary>
+    public class PatternParser
+    {
+        internal int currElement;
+
+        internal IPatternConsumer consumer;
+
+        internal StringBuilder token;
+
+        internal List<object> exception;
+
+        internal char hyphenChar;
+
+        internal string errMsg;
+
+        internal const int ELEM_CLASSES = 1;
+
+        internal const int ELEM_EXCEPTIONS = 2;
+
+        internal const int ELEM_PATTERNS = 3;
+
+        internal const int ELEM_HYPHEN = 4;
+
+        public PatternParser()
+        {
+            token = new StringBuilder();
+            hyphenChar = '-'; // default
+        }
+
+        public PatternParser(IPatternConsumer consumer) : this()
+        {
+            this.consumer = consumer;
+        }
+
+        public virtual IPatternConsumer Consumer
+        {
+            set
+            {
+                this.consumer = value;
+            }
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="filename"> the filename </param>
+        /// <exception cref="IOException"> In case of an exception while parsing </exception>
+        public virtual void Parse(string filename)
+        {
+            // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
+            using (var src = XmlReader.Create(filename, new XmlReaderSettings
+            {
+                DtdProcessing = DtdProcessing.Parse,
+                XmlResolver = new DtdResolver()
+            }))
+            {
+                Parse(src);
+            }
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="file"> the pattern file </param>
+        public virtual void Parse(FileInfo file)
+        {
+            Parse(file, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="file"> the pattern file </param>
+        public virtual void Parse(FileInfo file, Encoding encoding)
+        {
+            using (var src = XmlReader.Create(new StreamReader(file.FullName, encoding), new XmlReaderSettings
+            {
+                DtdProcessing = DtdProcessing.Parse,
+                XmlResolver = new DtdResolver()
+            }))
+            {
+
+                Parse(src);
+            }
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="file"> the pattern file </param>
+        public virtual void Parse(Stream xmlStream)
+        {
+            using (var src = XmlReader.Create(xmlStream, new XmlReaderSettings
+            {
+                DtdProcessing = DtdProcessing.Parse,
+                XmlResolver = new DtdResolver()
+            }))
+            {
+                Parse(src);
+            }
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="source"> the InputSource for the file </param>
+        /// <exception cref="IOException"> In case of an exception while parsing </exception>
+        public virtual void Parse(XmlReader source)
+        {
+            source.MoveToContent();
+            while (source.Read())
+            {
+                ParseNode(source);
+            }
+        }
+
+        private void ParseNode(XmlReader node)
+        {
+            string uri, name, raw;
+            switch (node.NodeType)
+            {
+                case XmlNodeType.Element:
+
+                    // Element start
+                    uri = node.NamespaceURI;
+                    name = node.Name;
+                    bool isEmptyElement = node.IsEmptyElement;
+                    var attributes = GetAttributes(node);
+                    raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer
+
+                    this.StartElement(uri, name, raw, attributes);
+                    if (isEmptyElement)
+                    {
+                        this.EndElement(uri, name, raw);
+                    }
+                    break;
+
+                case XmlNodeType.Text:
+
+                    this.Characters(node.Value.ToCharArray(), 0, node.Value.Length);
+                    break;
+
+                case XmlNodeType.EndElement:
+                    uri = node.NamespaceURI;
+                    name = node.Name;
+                    raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer
+
+                    // Element end
+                    this.EndElement(uri, name, raw);
+                    break;
+            }
+        }
+
+        private IDictionary<string, string> GetAttributes(XmlReader node)
+        {
+            var result = new Dictionary<string, string>();
+            if (node.HasAttributes)
+            {
+                for (int i = 0; i < node.AttributeCount; i++)
+                {
+                    node.MoveToAttribute(i);
+                    result.Add(node.Name, node.Value);
+                }
+            }
+
+            return result;
+        }
+
+        protected internal virtual string ReadToken(StringBuilder chars)
+        {
+            string word;
+            bool space = false;
+            int i;
+            for (i = 0; i < chars.Length; i++)
+            {
+                if (char.IsWhiteSpace(chars[i]))
+                {
+                    space = true;
+                }
+                else
+                {
+                    break;
+                }
+            }
+            if (space)
+            {
+                // chars.delete(0,i);
+                for (int countr = i; countr < chars.Length; countr++)
+                {
+                    chars[countr - i] = chars[countr];
+                }
+                chars.Length = chars.Length - i;
+                if (token.Length > 0)
+                {
+                    word = token.ToString();
+                    token.Length = 0;
+                    return word;
+                }
+            }
+            space = false;
+            for (i = 0; i < chars.Length; i++)
+            {
+                if (char.IsWhiteSpace(chars[i]))
+                {
+                    space = true;
+                    break;
+                }
+            }
+            token.Append(chars.ToString(0, i - 0));
+            // chars.delete(0,i);
+            for (int countr = i; countr < chars.Length; countr++)
+            {
+                chars[countr - i] = chars[countr];
+            }
+            chars.Length = chars.Length - i;
+            if (space)
+            {
+                word = token.ToString();
+                token.Length = 0;
+                return word;
+            }
+            token.Append(chars.ToString());
+            return null;
+        }
+
+        protected internal static string GetPattern(string word)
+        {
+            StringBuilder pat = new StringBuilder();
+            int len = word.Length;
+            for (int i = 0; i < len; i++)
+            {
+                if (!char.IsDigit(word[i]))
+                {
+                    pat.Append(word[i]);
+                }
+            }
+            return pat.ToString();
+        }
+
+        protected internal virtual List<object> NormalizeException<T1>(List<T1> ex)
+        {
+            List<object> res = new List<object>();
+            for (int i = 0; i < ex.Count; i++)
+            {
+                object item = ex[i];
+                if (item is string)
+                {
+                    string str = (string)item;
+                    StringBuilder buf = new StringBuilder();
+                    for (int j = 0; j < str.Length; j++)
+                    {
+                        char c = str[j];
+                        if (c != hyphenChar)
+                        {
+                            buf.Append(c);
+                        }
+                        else
+                        {
+                            res.Add(buf.ToString());
+                            buf.Length = 0;
+                            char[] h = new char[1];
+                            h[0] = hyphenChar;
+                            // we use here hyphenChar which is not necessarily
+                            // the one to be printed
+                            res.Add(new Hyphen(new string(h), null, null));
+                        }
+                    }
+                    if (buf.Length > 0)
+                    {
+                        res.Add(buf.ToString());
+                    }
+                }
+                else
+                {
+                    res.Add(item);
+                }
+            }
+            return res;
+        }
+
+        protected internal virtual string GetExceptionWord<T1>(List<T1> ex)
+        {
+            StringBuilder res = new StringBuilder();
+            for (int i = 0; i < ex.Count; i++)
+            {
+                object item = ex[i];
+                if (item is string)
+                {
+                    res.Append((string)item);
+                }
+                else
+                {
+                    if (((Hyphen)item).noBreak != null)
+                    {
+                        res.Append(((Hyphen)item).noBreak);
+                    }
+                }
+            }
+            return res.ToString();
+        }
+
+        protected internal static string GetInterletterValues(string pat)
+        {
+            StringBuilder il = new StringBuilder();
+            string word = pat + "a"; // add dummy letter to serve as sentinel
+            int len = word.Length;
+            for (int i = 0; i < len; i++)
+            {
+                char c = word[i];
+                if (char.IsDigit(c))
+                {
+                    il.Append(c);
+                    i++;
+                }
+                else
+                {
+                    il.Append('0');
+                }
+            }
+            return il.ToString();
+        }
+
+        /// <summary>
+        /// LUCENENET specific helper class to force the DTD file to be read from the embedded resource
+        /// rather than from the file system.
+        /// </summary>
+        internal class DtdResolver : XmlUrlResolver
+        {
+            public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
+            {
+                string dtdFilename = "hyphenation.dtd";
+                if (dtdFilename.Equals(absoluteUri.Segments.LastOrDefault(), StringComparison.OrdinalIgnoreCase))
+                {
+                    var qualifedDtdFilename = string.Concat(GetType().Namespace, ".", dtdFilename);
+                    return GetType().Assembly.GetManifestResourceStream(qualifedDtdFilename);
+                }
+
+                return base.GetEntity(absoluteUri, role, ofObjectToReturn);
+            }
+        }
+
+        //
+        // ContentHandler methods
+        //
+
+        /// <seealso cref= org.xml.sax.ContentHandler#startElement(java.lang.String,
+        ///      java.lang.String, java.lang.String, org.xml.sax.Attributes) </seealso>
+        public void StartElement(string uri, string local, string raw, IDictionary<string, string> attrs)
+        {
+            if (local.Equals("hyphen-char"))
+            {
+                string h = attrs.ContainsKey("value") ? attrs["value"] : null;
+                if (h != null && h.Length == 1)
+                {
+                    hyphenChar = h[0];
+                }
+            }
+            else if (local.Equals("classes"))
+            {
+                currElement = ELEM_CLASSES;
+            }
+            else if (local.Equals("patterns"))
+            {
+                currElement = ELEM_PATTERNS;
+            }
+            else if (local.Equals("exceptions"))
+            {
+                currElement = ELEM_EXCEPTIONS;
+                exception = new List<object>();
+            }
+            else if (local.Equals("hyphen"))
+            {
+                if (token.Length > 0)
+                {
+                    exception.Add(token.ToString());
+                }
+                exception.Add(new Hyphen(attrs["pre"], attrs["no"], attrs["post"]));
+                currElement = ELEM_HYPHEN;
+            }
+            token.Length = 0;
+        }
+
+        /// <seealso cref= org.xml.sax.ContentHandler#endElement(java.lang.String,
+        ///      java.lang.String, java.lang.String) </seealso>
+        public void EndElement(string uri, string local, string raw)
+        {
+            if (token.Length > 0)
+            {
+                string word = token.ToString();
+                switch (currElement)
+                {
+                    case ELEM_CLASSES:
+                        consumer.AddClass(word);
+                        break;
+                    case ELEM_EXCEPTIONS:
+                        exception.Add(word);
+                        exception = NormalizeException(exception);
+                        consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
+                        break;
+                    case ELEM_PATTERNS:
+                        consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
+                        break;
+                    case ELEM_HYPHEN:
+                        // nothing to do
+                        break;
+                }
+                if (currElement != ELEM_HYPHEN)
+                {
+                    token.Length = 0;
+                }
+            }
+            if (currElement == ELEM_HYPHEN)
+            {
+                currElement = ELEM_EXCEPTIONS;
+            }
+            else
+            {
+                currElement = 0;
+            }
+        }
+
+        /// <seealso cref= org.xml.sax.ContentHandler#characters(char[], int, int) </seealso>
+        public void Characters(char[] ch, int start, int length)
+        {
+            StringBuilder chars = new StringBuilder(length);
+            chars.Append(ch, start, length);
+            string word = ReadToken(chars);
+            while (word != null)
+            {
+                // System.out.println("\"" + word + "\"");
+                switch (currElement)
+                {
+                    case ELEM_CLASSES:
+                        consumer.AddClass(word);
+                        break;
+                    case ELEM_EXCEPTIONS:
+                        exception.Add(word);
+                        exception = NormalizeException(exception);
+                        consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
+                        exception.Clear();
+                        break;
+                    case ELEM_PATTERNS:
+                        consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
+                        break;
+                }
+                word = ReadToken(chars);
+            }
+
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
new file mode 100644
index 0000000..88cfd01
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
@@ -0,0 +1,816 @@
+\ufeffusing System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// <h2>Ternary Search Tree.</h2>
+    /// 
+    /// <para>
+    /// A ternary search tree is a hybrid between a binary tree and a digital search
+    /// tree (trie). Keys are limited to strings. A data value of type char is stored
+    /// in each leaf node. It can be used as an index (or pointer) to the data.
+    /// Branches that only contain one key are compressed to one node by storing a
+    /// pointer to the trailer substring of the key. This class is intended to serve
+    /// as base class or helper class to implement Dictionary collections or the
+    /// like. Ternary trees have some nice properties as the following: the tree can
+    /// be traversed in sorted order, partial matches (wildcard) can be implemented,
+    /// retrieval of all keys within a given distance from the target, etc. The
+    /// storage requirements are higher than a binary tree but a lot less than a
+    /// trie. Performance is comparable with a hash table, sometimes it outperforms a
+    /// hash function (most of the time can determine a miss faster than a hash).
+    /// </para>
+    /// 
+    /// <para>
+    /// The main purpose of this java port is to serve as a base for implementing
+    /// TeX's hyphenation algorithm (see The TeXBook, appendix H). Each language
+    /// requires from 5000 to 15000 hyphenation patterns which will be keys in this
+    /// tree. The strings patterns are usually small (from 2 to 5 characters), but
+    /// each char in the tree is stored in a node. Thus memory usage is the main
+    /// concern. We will sacrifice 'elegance' to keep memory requirements to the
+    /// minimum. Using java's char type as pointer (yes, I know pointer it is a
+    /// forbidden word in java) we can keep the size of the node to be just 8 bytes
+    /// (3 pointers and the data char). This gives room for about 65000 nodes. In my
+    /// tests the english patterns took 7694 nodes and the german patterns 10055
+    /// nodes, so I think we are safe.
+    /// </para>
+    /// 
+    /// <para>
+    /// All said, this is a map with strings as keys and char as value. Pretty
+    /// limited!. It can be extended to a general map by using the string
+    /// representation of an object and using the char value as an index to an array
+    /// that contains the object values.
+    /// </para>
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+
+    public class TernaryTree : ICloneable
+    {
+        /// <summary>
+        /// We use 4 arrays to represent a node. I guess I should have created a proper
+        /// node class, but somehow Knuth's pascal code made me forget we now have a
+        /// portable language with virtual memory management and automatic garbage
+        /// collection! And now is kind of late, furthermore, if it ain't broken, don't
+        /// fix it.
+        /// </summary>
+
+        /// <summary>
+        /// Pointer to low branch and to rest of the key when it is stored directly in
+        /// this node, we don't have unions in java!
+        /// </summary>
+        protected internal char[] lo;
+
+        /// <summary>
+        /// Pointer to high branch.
+        /// </summary>
+        protected internal char[] hi;
+
+        /// <summary>
+        /// Pointer to equal branch and to data when this node is a string terminator.
+        /// </summary>
+        protected internal char[] eq;
+
+        /// <summary>
+        /// <P>
+        /// The character stored in this node: splitchar. Two special values are
+        /// reserved:
+        /// </P>
+        /// <ul>
+        /// <li>0x0000 as string terminator</li>
+        /// <li>0xFFFF to indicate that the branch starting at this node is compressed</li>
+        /// </ul>
+        /// <para>
+        /// This shouldn't be a problem if we give the usual semantics to strings since
+        /// 0xFFFF is guaranteed not to be an Unicode character.
+        /// </para>
+        /// </summary>
+        protected internal char[] sc;
+
+        /// <summary>
+        /// This vector holds the trailing of the keys when the branch is compressed.
+        /// </summary>
+        protected internal CharVector kv;
+
+        protected internal char root;
+
+        protected internal char freenode;
+
+        protected internal int length; // number of items in tree
+
+        protected internal const int BLOCK_SIZE = 2048; // allocation size for arrays
+
+        internal TernaryTree()
+        {
+            Init();
+        }
+
+        protected internal virtual void Init()
+        {
+            root = (char)0;
+            freenode = (char)1;
+            length = 0;
+            lo = new char[BLOCK_SIZE];
+            hi = new char[BLOCK_SIZE];
+            eq = new char[BLOCK_SIZE];
+            sc = new char[BLOCK_SIZE];
+            kv = new CharVector();
+        }
+
+        /// <summary>
+        /// Branches are initially compressed, needing one node per key plus the size
+        /// of the string key. They are decompressed as needed when another key with
+        /// same prefix is inserted. This saves a lot of space, specially for long
+        /// keys.
+        /// </summary>
+        public virtual void Insert(string key, char val)
+        {
+            // make sure we have enough room in the arrays
+            int len = key.Length + 1; // maximum number of nodes that may be generated
+            if (freenode + len > eq.Length)
+            {
+                RedimNodeArrays(eq.Length + BLOCK_SIZE);
+            }
+            char[] strkey = new char[len--];
+            key.CopyTo(0, strkey, 0, len - 0);
+            strkey[len] = (char)0;
+            root = Insert(root, strkey, 0, val);
+        }
+
+        public virtual void Insert(char[] key, int start, char val)
+        {
+            int len = StrLen(key) + 1;
+            if (freenode + len > eq.Length)
+            {
+                RedimNodeArrays(eq.Length + BLOCK_SIZE);
+            }
+            root = Insert(root, key, start, val);
+        }
+
+        /// <summary>
+        /// The actual insertion function, recursive version.
+        /// </summary>
+        private char Insert(char p, char[] key, int start, char val)
+        {
+            int len = StrLen(key, start);
+            if (p == 0)
+            {
+                // this means there is no branch, this node will start a new branch.
+                // Instead of doing that, we store the key somewhere else and create
+                // only one node with a pointer to the key
+                p = freenode++;
+                eq[p] = val; // holds data
+                length++;
+                hi[p] = (char)0;
+                if (len > 0)
+                {
+                    sc[p] = (char)0xFFFF; // indicates branch is compressed
+                    lo[p] = (char)kv.Alloc(len + 1); // use 'lo' to hold pointer to key
+                    StrCpy(kv.Array, lo[p], key, start);
+                }
+                else
+                {
+                    sc[p] = (char)0;
+                    lo[p] = (char)0;
+                }
+                return p;
+            }
+
+            if (sc[p] == 0xFFFF)
+            {
+                // branch is compressed: need to decompress
+                // this will generate garbage in the external key array
+                // but we can do some garbage collection later
+                char pp = freenode++;
+                lo[pp] = lo[p]; // previous pointer to key
+                eq[pp] = eq[p]; // previous pointer to data
+                lo[p] = (char)0;
+                if (len > 0)
+                {
+                    sc[p] = kv[lo[pp]];
+                    eq[p] = pp;
+                    lo[pp]++;
+                    if (kv[lo[pp]] == 0)
+                    {
+                        // key completly decompressed leaving garbage in key array
+                        lo[pp] = (char)0;
+                        sc[pp] = (char)0;
+                        hi[pp] = (char)0;
+                    }
+                    else
+                    {
+                        // we only got first char of key, rest is still there
+                        sc[pp] = (char)0xFFFF;
+                    }
+                }
+                else
+                {
+                    // In this case we can save a node by swapping the new node
+                    // with the compressed node
+                    sc[pp] = (char)0xFFFF;
+                    hi[p] = pp;
+                    sc[p] = (char)0;
+                    eq[p] = val;
+                    length++;
+                    return p;
+                }
+            }
+            char s = key[start];
+            if (s < sc[p])
+            {
+                lo[p] = Insert(lo[p], key, start, val);
+            }
+            else if (s == sc[p])
+            {
+                if (s != 0)
+                {
+                    eq[p] = Insert(eq[p], key, start + 1, val);
+                }
+                else
+                {
+                    // key already in tree, overwrite data
+                    eq[p] = val;
+                }
+            }
+            else
+            {
+                hi[p] = Insert(hi[p], key, start, val);
+            }
+            return p;
+        }
+
+        /// <summary>
+        /// Compares 2 null terminated char arrays
+        /// </summary>
+        public static int StrCmp(char[] a, int startA, char[] b, int startB)
+        {
+            for (; a[startA] == b[startB]; startA++, startB++)
+            {
+                if (a[startA] == 0)
+                {
+                    return 0;
+                }
+            }
+            return a[startA] - b[startB];
+        }
+
+        /// <summary>
+        /// Compares a string with null terminated char array
+        /// </summary>
+        public static int StrCmp(string str, char[] a, int start)
+        {
+            int i, d, len = str.Length;
+            for (i = 0; i < len; i++)
+            {
+                d = (int)str[i] - a[start + i];
+                if (d != 0)
+                {
+                    return d;
+                }
+                if (a[start + i] == 0)
+                {
+                    return d;
+                }
+            }
+            if (a[start + i] != 0)
+            {
+                return -a[start + i];
+            }
+            return 0;
+
+        }
+
+        public static void StrCpy(char[] dst, int di, char[] src, int si)
+        {
+            while (src[si] != 0)
+            {
+                dst[di++] = src[si++];
+            }
+            dst[di] = (char)0;
+        }
+
+        public static int StrLen(char[] a, int start)
+        {
+            int len = 0;
+            for (int i = start; i < a.Length && a[i] != 0; i++)
+            {
+                len++;
+            }
+            return len;
+        }
+
+        public static int StrLen(char[] a)
+        {
+            return StrLen(a, 0);
+        }
+
+        public virtual int Find(string key)
+        {
+            int len = key.Length;
+            char[] strkey = new char[len + 1];
+            key.CopyTo(0, strkey, 0, len - 0);
+            strkey[len] = (char)0;
+
+            return Find(strkey, 0);
+        }
+
+        public virtual int Find(char[] key, int start)
+        {
+            int d;
+            char p = root;
+            int i = start;
+            char c;
+
+            while (p != 0)
+            {
+                if (sc[p] == 0xFFFF)
+                {
+                    if (StrCmp(key, i, kv.Array, lo[p]) == 0)
+                    {
+                        return eq[p];
+                    }
+                    else
+                    {
+                        return -1;
+                    }
+                }
+                c = key[i];
+                d = c - sc[p];
+                if (d == 0)
+                {
+                    if (c == 0)
+                    {
+                        return eq[p];
+                    }
+                    i++;
+                    p = eq[p];
+                }
+                else if (d < 0)
+                {
+                    p = lo[p];
+                }
+                else
+                {
+                    p = hi[p];
+                }
+            }
+            return -1;
+        }
+
+        public virtual bool Knows(string key)
+        {
+            return (Find(key) >= 0);
+        }
+
+        // redimension the arrays
+        private void RedimNodeArrays(int newsize)
+        {
+            int len = newsize < lo.Length ? newsize : lo.Length;
+            char[] na = new char[newsize];
+            Array.Copy(lo, 0, na, 0, len);
+            lo = na;
+            na = new char[newsize];
+            Array.Copy(hi, 0, na, 0, len);
+            hi = na;
+            na = new char[newsize];
+            Array.Copy(eq, 0, na, 0, len);
+            eq = na;
+            na = new char[newsize];
+            Array.Copy(sc, 0, na, 0, len);
+            sc = na;
+        }
+
+        public virtual int Length
+        {
+            get { return length; }
+        }
+
+        public object Clone()
+        {
+            TernaryTree t = new TernaryTree();
+            t.lo = (char[])this.lo.Clone();
+            t.hi = (char[])this.hi.Clone();
+            t.eq = (char[])this.eq.Clone();
+            t.sc = (char[])this.sc.Clone();
+            t.kv = (CharVector)this.kv.Clone();
+            t.root = this.root;
+            t.freenode = this.freenode;
+            t.length = this.length;
+
+            return t;
+        }
+
+        /// <summary>
+        /// Recursively insert the median first and then the median of the lower and
+        /// upper halves, and so on in order to get a balanced tree. The array of keys
+        /// is assumed to be sorted in ascending order.
+        /// </summary>
+        protected internal virtual void InsertBalanced(string[] k, char[] v, int offset, int n)
+        {
+            int m;
+            if (n < 1)
+            {
+                return;
+            }
+            m = n >> 1;
+
+            Insert(k[m + offset], v[m + offset]);
+            InsertBalanced(k, v, offset, m);
+
+            InsertBalanced(k, v, offset + m + 1, n - m - 1);
+        }
+
+        /// <summary>
+        /// Balance the tree for best search performance
+        /// </summary>
+        public virtual void Balance()
+        {
+            // System.out.print("Before root splitchar = ");
+            // System.out.println(sc[root]);
+
+            int i = 0, n = length;
+            string[] k = new string[n];
+            char[] v = new char[n];
+            Iterator iter = new Iterator(this);
+            while (iter.MoveNext())
+            {
+                v[i] = iter.Value;
+                k[i++] = iter.Current;
+            }
+            Init();
+            InsertBalanced(k, v, 0, n);
+
+            // With uniform letter distribution sc[root] should be around 'm'
+            // System.out.print("After root splitchar = ");
+            // System.out.println(sc[root]);
+        }
+
+        /// <summary>
+        /// Each node stores a character (splitchar) which is part of some key(s). In a
+        /// compressed branch (one that only contain a single string key) the trailer
+        /// of the key which is not already in nodes is stored externally in the kv
+        /// array. As items are inserted, key substrings decrease. Some substrings may
+        /// completely disappear when the whole branch is totally decompressed. The
+        /// tree is traversed to find the key substrings actually used. In addition,
+        /// duplicate substrings are removed using a map (implemented with a
+        /// TernaryTree!).
+        /// 
+        /// </summary>
+        public virtual void TrimToSize()
+        {
+            // first balance the tree for best performance
+            Balance();
+
+            // redimension the node arrays
+            RedimNodeArrays(freenode);
+
+            // ok, compact kv array
+            CharVector kx = new CharVector();
+            kx.Alloc(1);
+            TernaryTree map = new TernaryTree();
+            Compact(kx, map, root);
+            kv = kx;
+            kv.TrimToSize();
+        }
+
+        private void Compact(CharVector kx, TernaryTree map, char p)
+        {
+            int k;
+            if (p == 0)
+            {
+                return;
+            }
+            if (sc[p] == 0xFFFF)
+            {
+                k = map.Find(kv.Array, lo[p]);
+                if (k < 0)
+                {
+                    k = kx.Alloc(StrLen(kv.Array, lo[p]) + 1);
+                    StrCpy(kx.Array, k, kv.Array, lo[p]);
+                    map.Insert(kx.Array, k, (char)k);
+                }
+                lo[p] = (char)k;
+            }
+            else
+            {
+                Compact(kx, map, lo[p]);
+                if (sc[p] != 0)
+                {
+                    Compact(kx, map, eq[p]);
+                }
+                Compact(kx, map, hi[p]);
+            }
+        }
+
+        public virtual IEnumerator<string> Keys()
+        {
+            return new Iterator(this);
+        }
+
+        /// <summary>
+        /// Enumerator for TernaryTree
+        /// 
+        /// LUCENENET NOTE: This differs a bit from its Java counterpart to adhere to
+        /// .NET IEnumerator semantics. In Java, when the <see cref="Iterator"/> is
+        /// instantiated, it is already positioned at the first element. However,
+        /// to act like a .NET IEnumerator, the initial state is undefined and considered
+        /// to be before the first element until <see cref="MoveNext"/> is called, and
+        /// if a move took place it will return <c>true</c>;
+        /// </summary>
+        public class Iterator : IEnumerator<string>
+        {
+            private readonly TernaryTree outerInstance;
+
+
+            /// <summary>
+            /// current node index
+            /// </summary>
+            private int cur;
+
+            /// <summary>
+            /// current key
+            /// </summary>
+            private string curkey;
+
+            internal class Item : ICloneable
+            {
+                internal char parent;
+                internal char child;
+
+                public Item()
+                {
+                    parent = (char)0;
+                    child = (char)0;
+                }
+
+                public Item(char p, char c)
+                {
+                    parent = p;
+                    child = c;
+                }
+
+                public object Clone()
+                {
+                    return new Item(parent, child);
+                }
+
+            }
+
+            /// <summary>
+            /// Node stack
+            /// </summary>
+            internal Stack<Item> ns;
+
+            /// <summary>
+            /// key stack implemented with a StringBuilder
+            /// </summary>
+            internal StringBuilder ks;
+
+            private bool isInitialized = false;
+
+            public Iterator(TernaryTree outerInstance)
+            {
+                this.outerInstance = outerInstance;
+                cur = -1;
+                ns = new Stack<Item>();
+                ks = new StringBuilder();
+                isInitialized = false;
+            }
+
+            public virtual void Rewind()
+            {
+                ns.Clear();
+                ks.Length = 0;
+                cur = outerInstance.root;
+                Run();
+            }
+
+            public virtual char Value
+            {
+                get
+                {
+                    if (cur >= 0)
+                    {
+                        return outerInstance.eq[cur];
+                    }
+                    return (char)0;
+                }
+            }
+
+            /// <summary>
+            /// traverse upwards
+            /// </summary>
+            internal virtual int Up()
+            {
+                Item i = new Item();
+                int res = 0;
+
+                if (ns.Count == 0)
+                {
+                    return -1;
+                }
+
+                if (cur != 0 && outerInstance.sc[cur] == 0)
+                {
+                    return outerInstance.lo[cur];
+                }
+
+                bool climb = true;
+
+                while (climb)
+                {
+                    i = ns.Pop();
+                    i.child++;
+                    switch ((int)i.child)
+                    {
+                        case 1:
+                            if (outerInstance.sc[i.parent] != 0)
+                            {
+                                res = outerInstance.eq[i.parent];
+                                ns.Push((Item)i.Clone());
+                                ks.Append(outerInstance.sc[i.parent]);
+                            }
+                            else
+                            {
+                                i.child++;
+                                ns.Push((Item)i.Clone());
+                                res = outerInstance.hi[i.parent];
+                            }
+                            climb = false;
+                            break;
+
+                        case 2:
+                            res = outerInstance.hi[i.parent];
+                            ns.Push((Item)i.Clone());
+                            if (ks.Length > 0)
+                            {
+                                ks.Length = ks.Length - 1; // pop
+                            }
+                            climb = false;
+                            break;
+
+                        default:
+                            if (ns.Count == 0)
+                            {
+                                return -1;
+                            }
+                            climb = true;
+                            break;
+                    }
+                }
+                return res;
+            }
+
+            /// <summary>
+            /// traverse the tree to find next key
+            /// </summary>
+            internal virtual int Run()
+            {
+                if (cur == -1)
+                {
+                    return -1;
+                }
+
+                bool leaf = false;
+                while (true)
+                {
+                    // first go down on low branch until leaf or compressed branch
+                    while (cur != 0)
+                    {
+                        if (outerInstance.sc[cur] == 0xFFFF)
+                        {
+                            leaf = true;
+                            break;
+                        }
+                        ns.Push(new Item((char)cur, '\u0000'));
+                        if (outerInstance.sc[cur] == 0)
+                        {
+                            leaf = true;
+                            break;
+                        }
+                        cur = outerInstance.lo[cur];
+                    }
+                    if (leaf)
+                    {
+                        break;
+                    }
+                    // nothing found, go up one node and try again
+                    cur = Up();
+                    if (cur == -1)
+                    {
+                        return -1;
+                    }
+                }
+                // The current node should be a data node and
+                // the key should be in the key stack (at least partially)
+                StringBuilder buf = new StringBuilder(ks.ToString());
+                if (outerInstance.sc[cur] == 0xFFFF)
+                {
+                    int p = outerInstance.lo[cur];
+                    while (outerInstance.kv[p] != 0)
+                    {
+                        buf.Append(outerInstance.kv[p++]);
+                    }
+                }
+                curkey = buf.ToString();
+                return 0;
+            }
+
+            #region Added for better .NET support
+            public string Current
+            {
+                get
+                {
+                    return curkey;
+                }
+            }
+
+            object IEnumerator.Current
+            {
+                get
+                {
+                    return Current;
+                }
+            }
+
+            public void Dispose()
+            {
+                // nothing to do
+            }
+
+            public bool MoveNext()
+            {
+                if (!isInitialized)
+                {
+                    Rewind();
+                    isInitialized = true;
+                    return cur != -1;
+                }
+                if (cur == -1)
+                {
+                    return false;
+                }
+                cur = Up();
+                Run();
+                return cur != -1;
+            }
+
+            public void Reset()
+            {
+                throw new NotSupportedException();
+            }
+
+            #endregion
+        }
+
+        public virtual void PrintStats(TextWriter @out)
+        {
+            @out.WriteLine("Number of keys = " + Convert.ToString(length));
+            @out.WriteLine("Node count = " + Convert.ToString(freenode));
+            // System.out.println("Array length = " + Integer.toString(eq.length));
+            @out.WriteLine("Key Array length = " + Convert.ToString(kv.Length()));
+
+            /*
+             * for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 )
+             * System.out.print(kv.get(i)); else System.out.println("");
+             * System.out.println("Keys:"); for(Enumeration enum = keys();
+             * enum.hasMoreElements(); ) System.out.println(enum.nextElement());
+             */
+        }
+        /*
+          public static void main(String[] args) {
+            TernaryTree tt = new TernaryTree();
+            tt.insert("Carlos", 'C');
+            tt.insert("Car", 'r');
+            tt.insert("palos", 'l');
+            tt.insert("pa", 'p');
+            tt.trimToSize();
+            System.out.println((char) tt.find("Car"));
+            System.out.println((char) tt.find("Carlos"));
+            System.out.println((char) tt.find("alto"));
+            tt.printStats(System.out);
+          }
+          */
+
+    }
+}
\ No newline at end of file