You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2016/12/14 20:00:48 UTC
[2/4] lucenenet git commit: Renamed hyphenation to Hyphenation to fix build and run on case sensitive file systems

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/hyphenation.dtd
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/hyphenation.dtd b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/hyphenation.dtd
new file mode 100644
index 0000000..083c2bd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/hyphenation.dtd
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  Copyright 1999-2004 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!-- $Id: hyphenation.dtd,v 1.3 2004/02/27 18:34:59 jeremias Exp $ -->
+
+<!ELEMENT hyphenation-info (hyphen-char?, hyphen-min?,
+                           classes, exceptions?, patterns)>
+
+<!-- Hyphen character to be used in the exception list as shortcut for
+     <hyphen pre-break="-"/>. Defaults to '-'
+-->
+<!ELEMENT hyphen-char EMPTY>
+<!ATTLIST hyphen-char value CDATA #REQUIRED>
+
+<!-- Default minimun length in characters of hyphenated word fragments
+     before and after the line break. For some languages this is not
+     only for aesthetic purposes, wrong hyphens may be generated if this
+     is not accounted for.
+-->
+<!ELEMENT hyphen-min EMPTY>
+<!ATTLIST hyphen-min before CDATA #REQUIRED>
+<!ATTLIST hyphen-min after CDATA #REQUIRED>
+
+<!-- Character equivalent classes: space separated list of character groups, all
+     characters in a group are to be treated equivalent as far as
+     the hyphenation algorithm is concerned. The first character in a group
+     is the group's equivalent character. Patterns should only contain
+     first characters. It also defines word characters, i.e. a word that
+     contains characters not present in any of the classes is not hyphenated.
+-->
+<!ELEMENT classes (#PCDATA)>
+
+<!-- Hyphenation exceptions: space separated list of hyphenated words.
+     A hyphen is indicated by the hyphen tag, but you can use the
+     hyphen-char defined previously as shortcut. This is in cases
+     when the algorithm procedure finds wrong hyphens or you want
+     to provide your own hyphenation for some words.
+-->
+<!ELEMENT exceptions (#PCDATA|hyphen)* >
+
+<!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
+     characters as described before, between any two word characters a digit
+     in the range 0 to 9 may be specified. The absence of a digit is equivalent
+     to zero. The '.' character is reserved to indicate begining or ending
+     of words. -->
+<!ELEMENT patterns (#PCDATA)>
+
+<!-- A "full hyphen" equivalent to TeX's \discretionary
+     with pre-break, post-break and no-break attributes.
+     To be used in the exceptions list, the hyphen character is not
+     automatically added -->
+<!ELEMENT hyphen EMPTY>
+<!ATTLIST hyphen pre CDATA #IMPLIED>
+<!ATTLIST hyphen no CDATA #IMPLIED>
+<!ATTLIST hyphen post CDATA #IMPLIED>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
deleted file mode 100644
index 6442d11..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
+++ /dev/null
@@ -1,156 +0,0 @@
-\ufeffnamespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// This class implements a simple byte vector with access to the underlying
-    /// array.
-    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-    /// </summary>
-    public class ByteVector
-    {
-
-        /// <summary>
-        /// Capacity increment size
-        /// </summary>
-        private const int DEFAULT_BLOCK_SIZE = 2048;
-
-        private int blockSize;
-
-        /// <summary>
-        /// The encapsulated array
-        /// </summary>
-        private sbyte[] array;
-
-        /// <summary>
-        /// Points to next free item
-        /// </summary>
-        private int n;
-
-        public ByteVector() : this(DEFAULT_BLOCK_SIZE)
-        {
-        }
-
-        public ByteVector(int capacity)
-        {
-            if (capacity > 0)
-            {
-                blockSize = capacity;
-            }
-            else
-            {
-                blockSize = DEFAULT_BLOCK_SIZE;
-            }
-            array = new sbyte[blockSize];
-            n = 0;
-        }
-
-        public ByteVector(sbyte[] a)
-        {
-            blockSize = DEFAULT_BLOCK_SIZE;
-            array = a;
-            n = 0;
-        }
-
-        public ByteVector(sbyte[] a, int capacity)
-        {
-            if (capacity > 0)
-            {
-                blockSize = capacity;
-            }
-            else
-            {
-                blockSize = DEFAULT_BLOCK_SIZE;
-            }
-            array = a;
-            n = 0;
-        }
-
-        public virtual sbyte[] Array
-        {
-            get
-            {
-                return array;
-            }
-        }
-
-        /// <summary>
-        /// LUCENENET indexer for .NET
-        /// </summary>
-        /// <param name="index"></param>
-        /// <returns></returns>
-        public virtual sbyte this[int index]
-        {
-            get { return array[index]; }
-            set { array[index] = value; }
-        }
-
-        /// <summary>
-        /// return number of items in array
-        /// </summary>
-        public virtual int Length
-        {
-            get { return n; }
-        }
-
-        /// <summary>
-        /// returns current capacity of array
-        /// </summary>
-        public virtual int Capacity
-        {
-            get { return array.Length; }
-        }
-
-        //public virtual void Put(int index, sbyte val)
-        //{
-        //    array[index] = val;
-        //}
-
-        //public virtual sbyte Get(int index)
-        //{
-        //    return array[index];
-        //}
-
-        /// <summary>
-        /// This is to implement memory allocation in the array. Like malloc().
-        /// </summary>
-        public virtual int Alloc(int size)
-        {
-            int index = n;
-            int len = array.Length;
-            if (n + size >= len)
-            {
-                sbyte[] aux = new sbyte[len + blockSize];
-                System.Array.Copy(array, 0, aux, 0, len);
-                array = aux;
-            }
-            n += size;
-            return index;
-        }
-
-        public virtual void TrimToSize()
-        {
-            if (n < array.Length)
-            {
-                sbyte[] aux = new sbyte[n];
-                System.Array.Copy(array, 0, aux, 0, n);
-                array = aux;
-            }
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
deleted file mode 100644
index 26fcea5..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
+++ /dev/null
@@ -1,171 +0,0 @@
-\ufeffusing System;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// This class implements a simple char vector with access to the underlying
-    /// array.
-    /// 
-    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-    /// </summary>
-    public class CharVector : ICloneable
-    {
-
-        /// <summary>
-        /// Capacity increment size
-        /// </summary>
-        private const int DEFAULT_BLOCK_SIZE = 2048;
-
-        private int blockSize;
-
-        /// <summary>
-        /// The encapsulated array
-        /// </summary>
-        private char[] array;
-
-        /// <summary>
-        /// Points to next free item
-        /// </summary>
-        private int n;
-
-        public CharVector() : this(DEFAULT_BLOCK_SIZE)
-        {
-        }
-
-        public CharVector(int capacity)
-        {
-            if (capacity > 0)
-            {
-                blockSize = capacity;
-            }
-            else
-            {
-                blockSize = DEFAULT_BLOCK_SIZE;
-            }
-            array = new char[blockSize];
-            n = 0;
-        }
-
-        public CharVector(char[] a)
-        {
-            blockSize = DEFAULT_BLOCK_SIZE;
-            array = a;
-            n = a.Length;
-        }
-
-        public CharVector(char[] a, int capacity)
-        {
-            if (capacity > 0)
-            {
-                blockSize = capacity;
-            }
-            else
-            {
-                blockSize = DEFAULT_BLOCK_SIZE;
-            }
-            array = a;
-            n = a.Length;
-        }
-
-        /// <summary>
-        /// Reset Vector but don't resize or clear elements
-        /// </summary>
-        public virtual void Clear()
-        {
-            n = 0;
-        }
-
-        public virtual object Clone()
-        {
-            CharVector cv = new CharVector(array, blockSize);
-            cv.n = this.n;
-            return cv;
-        }
-
-        public virtual char[] Array
-        {
-            get
-            {
-                return array;
-            }
-        }
-
-        /// <summary>
-        /// LUCENENET indexer for .NET
-        /// </summary>
-        /// <param name="index"></param>
-        /// <returns></returns>
-        public virtual char this[int index]
-        {
-            get { return array[index]; }
-            set { array[index] = value; }
-        }
-
-        /// <summary>
-        /// return number of items in array
-        /// </summary>
-        public virtual int Length()
-        {
-            return n;
-        }
-
-        /// <summary>
-        /// returns current capacity of array
-        /// </summary>
-        public virtual int Capacity
-        {
-            get { return array.Length; }
-        }
-
-        //public virtual void Put(int index, char val)
-        //{
-        //    array[index] = val;
-        //}
-
-        //public virtual char get(int index)
-        //{
-        //    return array[index];
-        //}
-
-        public virtual int Alloc(int size)
-        {
-            int index = n;
-            int len = array.Length;
-            if (n + size >= len)
-            {
-                char[] aux = new char[len + blockSize];
-                System.Array.Copy(array, 0, aux, 0, len);
-                array = aux;
-            }
-            n += size;
-            return index;
-        }
-
-        public virtual void TrimToSize()
-        {
-            if (n < array.Length)
-            {
-                char[] aux = new char[n];
-                System.Array.Copy(array, 0, aux, 0, n);
-                array = aux;
-            }
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
deleted file mode 100644
index 91009b1..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
+++ /dev/null
@@ -1,72 +0,0 @@
-\ufeffusing System.Text;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// This class represents a hyphen. A 'full' hyphen is made of 3 parts: the
-    /// pre-break text, post-break text and no-break. If no line-break is generated
-    /// at this position, the no-break text is used, otherwise, pre-break and
-    /// post-break are used. Typically, pre-break is equal to the hyphen character
-    /// and the others are empty. However, this general scheme allows support for
-    /// cases in some languages where words change spelling if they're split across
-    /// lines, like german's 'backen' which hyphenates 'bak-ken'. BTW, this comes
-    /// from TeX.
-    /// 
-    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-    /// </summary>
-    public class Hyphen
-    {
-        public string preBreak;
-
-        public string noBreak;
-
-        public string postBreak;
-
-        internal Hyphen(string pre, string no, string post)
-        {
-            preBreak = pre;
-            noBreak = no;
-            postBreak = post;
-        }
-
-        internal Hyphen(string pre)
-        {
-            preBreak = pre;
-            noBreak = null;
-            postBreak = null;
-        }
-
-        public override string ToString()
-        {
-            if (noBreak == null && postBreak == null && preBreak != null && preBreak.Equals("-"))
-            {
-                return "-";
-            }
-            StringBuilder res = new StringBuilder("{");
-            res.Append(preBreak);
-            res.Append("}{");
-            res.Append(postBreak);
-            res.Append("}{");
-            res.Append(noBreak);
-            res.Append('}');
-            return res.ToString();
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
deleted file mode 100644
index fdbac29..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
+++ /dev/null
@@ -1,53 +0,0 @@
-\ufeffnamespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// This class represents a hyphenated word.
-    /// 
-    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
-    /// </summary>
-    public class Hyphenation
-    {
-
-        private readonly int[] hyphenPoints;
-
-        /// <summary>
-        /// rawWord as made of alternating strings and <seealso cref="Hyphen"/> instances
-        /// </summary>
-        internal Hyphenation(int[] points)
-        {
-            hyphenPoints = points;
-        }
-
-        /// <returns> the number of hyphenation points in the word </returns>
-        public virtual int Length
-        {
-            get { return hyphenPoints.Length; }
-        }
-
-        /// <returns> the hyphenation points </returns>
-        public virtual int[] HyphenationPoints
-        {
-            get
-            {
-                return hyphenPoints;
-            }
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
deleted file mode 100644
index 287f6f3..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
+++ /dev/null
@@ -1,581 +0,0 @@
-\ufeffusing Lucene.Net.Support;
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Text;
-using System.Xml;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-	/// This tree structure stores the hyphenation patterns in an efficient way for
-	/// fast lookup. It provides the provides the method to hyphenate a word.
-	/// 
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-	/// </summary>
-	public class HyphenationTree : TernaryTree, IPatternConsumer
-    {
-
-        /// <summary>
-        /// value space: stores the interletter values
-        /// </summary>
-        protected internal ByteVector vspace;
-
-        /// <summary>
-        /// This map stores hyphenation exceptions
-        /// </summary>
-        protected internal IDictionary<string, IList<object>> stoplist;
-
-        /// <summary>
-        /// This map stores the character classes
-        /// </summary>
-        protected internal TernaryTree classmap;
-
-        /// <summary>
-        /// Temporary map to store interletter values on pattern loading.
-        /// </summary>
-        [NonSerialized]
-        private TernaryTree ivalues;
-
-        public HyphenationTree()
-        {
-            stoplist = new HashMap<string, IList<object>>(23); // usually a small table
-            classmap = new TernaryTree();
-            vspace = new ByteVector();
-            vspace.Alloc(1); // this reserves index 0, which we don't use
-        }
-
-        /// <summary>
-        /// Packs the values by storing them in 4 bits, two values into a byte Values
-        /// range is from 0 to 9. We use zero as terminator, so we'll add 1 to the
-        /// value.
-        /// </summary>
-        /// <param name="values"> a string of digits from '0' to '9' representing the
-        ///        interletter values. </param>
-        /// <returns> the index into the vspace array where the packed values are stored. </returns>
-        protected internal virtual int PackValues(string values)
-        {
-            int i, n = values.Length;
-            int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
-            int offset = vspace.Alloc(m);
-            sbyte[] va = vspace.Array;
-            for (i = 0; i < n; i++)
-            {
-                int j = i >> 1;
-                sbyte v = (sbyte)((values[i] - '0' + 1) & 0x0f);
-                if ((i & 1) == 1)
-                {
-                    va[j + offset] = (sbyte)(va[j + offset] | v);
-                }
-                else
-                {
-                    va[j + offset] = (sbyte)(v << 4); // big endian
-                }
-            }
-            va[m - 1 + offset] = 0; // terminator
-            return offset;
-        }
-
-        protected internal virtual string UnpackValues(int k)
-        {
-            StringBuilder buf = new StringBuilder();
-            sbyte v = vspace[k++];
-            while (v != 0)
-            {
-                char c = (char)(((int)((uint)v >> 4)) - 1 + '0');
-                buf.Append(c);
-                c = (char)(v & 0x0f);
-                if (c == 0)
-                {
-                    break;
-                }
-                c = (char)(c - 1 + '0');
-                buf.Append(c);
-                v = vspace[k++];
-            }
-            return buf.ToString();
-        }
-
-        /// <summary>
-        /// Read hyphenation patterns from an XML file.
-        /// </summary>
-        /// <param name="f"> the filename </param>
-        /// <exception cref="IOException"> In case the parsing fails </exception>
-        public virtual void LoadPatterns(string filename)
-        {
-            LoadPatterns(filename, Encoding.UTF8);
-        }
-
-        /// <summary>
-        /// Read hyphenation patterns from an XML file.
-        /// </summary>
-        /// <param name="f"> the filename </param>
-        /// <exception cref="IOException"> In case the parsing fails </exception>
-        public virtual void LoadPatterns(string filename, Encoding encoding)
-        {
-            var src = new FileStream(filename, FileMode.Open, FileAccess.Read);
-            LoadPatterns(src, encoding);
-        }
-
-        /// <summary>
-        /// Read hyphenation patterns from an XML file.
-        /// </summary>
-        /// <param name="f"> the filename </param>
-        /// <exception cref="IOException"> In case the parsing fails </exception>
-        public virtual void LoadPatterns(FileInfo f)
-        {
-            LoadPatterns(f, Encoding.UTF8);
-        }
-
-        /// <summary>
-        /// Read hyphenation patterns from an XML file.
-        /// </summary>
-        /// <param name="f"> the filename </param>
-        /// <exception cref="IOException"> In case the parsing fails </exception>
-        public virtual void LoadPatterns(FileInfo f, Encoding encoding)
-        {
-            var src = new FileStream(f.FullName, FileMode.Open, FileAccess.Read);
-            LoadPatterns(src, encoding);
-        }
-
-        /// <summary>
-        /// Read hyphenation patterns from an XML file.
-        /// </summary>
-        /// <param name="source"> the InputSource for the file </param>
-        /// <exception cref="IOException"> In case the parsing fails </exception>
-        public virtual void LoadPatterns(Stream source)
-        {
-            LoadPatterns(source, Encoding.UTF8);
-        }
-
-        /// <summary>
-        /// Read hyphenation patterns from an XML file.
-        /// </summary>
-        /// <param name="source"> the InputSource for the file </param>
-        /// <exception cref="IOException"> In case the parsing fails </exception>
-        public virtual void LoadPatterns(Stream source, Encoding encoding)
-        {
-            // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
-            using (var reader = XmlReader.Create(new StreamReader(source, encoding), new XmlReaderSettings
-            {
-                DtdProcessing = DtdProcessing.Parse,
-                XmlResolver = new PatternParser.DtdResolver()
-            }))
-            {
-                LoadPatterns(reader);
-            }
-        }
-
-        public virtual void LoadPatterns(XmlReader source)
-        {
-            PatternParser pp = new PatternParser(this);
-            ivalues = new TernaryTree();
-
-            pp.Parse(source);
-
-            // patterns/values should be now in the tree
-            // let's optimize a bit
-            TrimToSize();
-            vspace.TrimToSize();
-            classmap.TrimToSize();
-
-            // get rid of the auxiliary map
-            ivalues = null;
-        }
-
-        public virtual string FindPattern(string pat)
-        {
-            int k = base.Find(pat);
-            if (k >= 0)
-            {
-                return UnpackValues(k);
-            }
-            return "";
-        }
-
-        /// <summary>
-        /// String compare, returns 0 if equal or t is a substring of s
-        /// </summary>
-        protected internal virtual int HStrCmp(char[] s, int si, char[] t, int ti)
-        {
-            for (; s[si] == t[ti]; si++, ti++)
-            {
-                if (s[si] == 0)
-                {
-                    return 0;
-                }
-            }
-            if (t[ti] == 0)
-            {
-                return 0;
-            }
-            return s[si] - t[ti];
-        }
-
-        protected internal virtual sbyte[] GetValues(int k)
-        {
-            StringBuilder buf = new StringBuilder();
-            sbyte v = vspace[k++];
-            while (v != 0)
-            {
-                char c = (char)((((int)((uint)v >> 4))) - 1);
-                buf.Append(c);
-                c = (char)(v & 0x0f);
-                if (c == 0)
-                {
-                    break;
-                }
-                c = (char)(c - 1);
-                buf.Append(c);
-                v = vspace[k++];
-            }
-            sbyte[] res = new sbyte[buf.Length];
-            for (int i = 0; i < res.Length; i++)
-            {
-                res[i] = (sbyte)buf[i];
-            }
-            return res;
-        }
-
-        /// <summary>
-        /// <para>
-        /// Search for all possible partial matches of word starting at index an update
-        /// interletter values. In other words, it does something like:
-        /// </para>
-        /// <code>
-        /// for(i=0; i&lt;patterns.length; i++) {
-        /// if ( word.substring(index).startsWidth(patterns[i]) )
-        /// update_interletter_values(patterns[i]);
-        /// }
-        /// </code>
-        /// <para>
-        /// But it is done in an efficient way since the patterns are stored in a
-        /// ternary tree. In fact, this is the whole purpose of having the tree: doing
-        /// this search without having to test every single pattern. The number of
-        /// patterns for languages such as English range from 4000 to 10000. Thus,
-        /// doing thousands of string comparisons for each word to hyphenate would be
-        /// really slow without the tree. The tradeoff is memory, but using a ternary
-        /// tree instead of a trie, almost halves the the memory used by Lout or TeX.
-        /// It's also faster than using a hash table
-        /// </para>
-        /// </summary>
-        /// <param name="word"> null terminated word to match </param>
-        /// <param name="index"> start index from word </param>
-        /// <param name="il"> interletter values array to update </param>
-        protected internal virtual void SearchPatterns(char[] word, int index, sbyte[] il)
-        {
-            sbyte[] values;
-            int i = index;
-            char p, q;
-            char sp = word[i];
-            p = root;
-
-            while (p > 0 && p < sc.Length)
-            {
-                if (sc[p] == 0xFFFF)
-                {
-                    if (HStrCmp(word, i, kv.Array, lo[p]) == 0)
-                    {
-                        values = GetValues(eq[p]); // data pointer is in eq[]
-                        int j = index;
-                        for (int k = 0; k < values.Length; k++)
-                        {
-                            if (j < il.Length && values[k] > il[j])
-                            {
-                                il[j] = values[k];
-                            }
-                            j++;
-                        }
-                    }
-                    return;
-                }
-                int d = sp - sc[p];
-                if (d == 0)
-                {
-                    if (sp == 0)
-                    {
-                        break;
-                    }
-                    sp = word[++i];
-                    p = eq[p];
-                    q = p;
-
-                    // look for a pattern ending at this position by searching for
-                    // the null char ( splitchar == 0 )
-                    while (q > 0 && q < sc.Length)
-                    {
-                        if (sc[q] == 0xFFFF) // stop at compressed branch
-                        {
-                            break;
-                        }
-                        if (sc[q] == 0)
-                        {
-                            values = GetValues(eq[q]);
-                            int j = index;
-                            for (int k = 0; k < values.Length; k++)
-                            {
-                                if (j < il.Length && values[k] > il[j])
-                                {
-                                    il[j] = values[k];
-                                }
-                                j++;
-                            }
-                            break;
-                        }
-                        else
-                        {
-                            q = lo[q];
-
-                            /// <summary>
-                            /// actually the code should be: q = sc[q] < 0 ? hi[q] : lo[q]; but
-                            /// java chars are unsigned
-                            /// </summary>
-                        }
-                    }
-                }
-                else
-                {
-                    p = d < 0 ? lo[p] : hi[p];
-                }
-            }
-        }
-
-        /// <summary>
-        /// Hyphenate word and return a Hyphenation object.
-        /// </summary>
-        /// <param name="word"> the word to be hyphenated </param>
-        /// <param name="remainCharCount"> Minimum number of characters allowed before the
-        ///        hyphenation point. </param>
-        /// <param name="pushCharCount"> Minimum number of characters allowed after the
-        ///        hyphenation point. </param>
-        /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
-        ///         hyphenated word or null if word is not hyphenated. </returns>
-        public virtual Hyphenation Hyphenate(string word, int remainCharCount, int pushCharCount)
-        {
-            char[] w = word.ToCharArray();
-            return Hyphenate(w, 0, w.Length, remainCharCount, pushCharCount);
-        }
-
-        /// <summary>
-        /// w = "****nnllllllnnn*****", where n is a non-letter, l is a letter, all n
-        /// may be absent, the first n is at offset, the first l is at offset +
-        /// iIgnoreAtBeginning; word = ".llllll.'\0'***", where all l in w are copied
-        /// into word. In the first part of the routine len = w.length, in the second
-        /// part of the routine len = word.length. Three indices are used: index(w),
-        /// the index in w, index(word), the index in word, letterindex(word), the
-        /// index in the letter part of word. The following relations exist: index(w) =
-        /// offset + i - 1 index(word) = i - iIgnoreAtBeginning letterindex(word) =
-        /// index(word) - 1 (see first loop). It follows that: index(w) - index(word) =
-        /// offset - 1 + iIgnoreAtBeginning index(w) = letterindex(word) + offset +
-        /// iIgnoreAtBeginning
-        /// </summary>
-
-        /// <summary>
-        /// Hyphenate word and return an array of hyphenation points.
-        /// </summary>
-        /// <param name="w"> char array that contains the word </param>
-        /// <param name="offset"> Offset to first character in word </param>
-        /// <param name="len"> Length of word </param>
-        /// <param name="remainCharCount"> Minimum number of characters allowed before the
-        ///        hyphenation point. </param>
-        /// <param name="pushCharCount"> Minimum number of characters allowed after the
-        ///        hyphenation point. </param>
-        /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
-        ///         hyphenated word or null if word is not hyphenated. </returns>
-        public virtual Hyphenation Hyphenate(char[] w, int offset, int len, int remainCharCount, int pushCharCount)
-        {
-            int i;
-            char[] word = new char[len + 3];
-
-            // normalize word
-            char[] c = new char[2];
-            int iIgnoreAtBeginning = 0;
-            int iLength = len;
-            bool bEndOfLetters = false;
-            for (i = 1; i <= len; i++)
-            {
-                c[0] = w[offset + i - 1];
-                int nc = classmap.Find(c, 0);
-                if (nc < 0) // found a non-letter character ...
-                {
-                    if (i == (1 + iIgnoreAtBeginning))
-                    {
-                        // ... before any letter character
-                        iIgnoreAtBeginning++;
-                    }
-                    else
-                    {
-                        // ... after a letter character
-                        bEndOfLetters = true;
-                    }
-                    iLength--;
-                }
-                else
-                {
-                    if (!bEndOfLetters)
-                    {
-                        word[i - iIgnoreAtBeginning] = (char)nc;
-                    }
-                    else
-                    {
-                        return null;
-                    }
-                }
-            }
-            len = iLength;
-            if (len < (remainCharCount + pushCharCount))
-            {
-                // word is too short to be hyphenated
-                return null;
-            }
-            int[] result = new int[len + 1];
-            int k = 0;
-
-            // check exception list first
-            string sw = new string(word, 1, len);
-            if (stoplist.ContainsKey(sw))
-            {
-                // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
-                // null)
-                IList<object> hw = stoplist[sw];
-                int j = 0;
-                for (i = 0; i < hw.Count; i++)
-                {
-                    object o = hw[i];
-                    // j = index(sw) = letterindex(word)?
-                    // result[k] = corresponding index(w)
-                    if (o is string)
-                    {
-                        j += ((string)o).Length;
-                        if (j >= remainCharCount && j < (len - pushCharCount))
-                        {
-                            result[k++] = j + iIgnoreAtBeginning;
-                        }
-                    }
-                }
-            }
-            else
-            {
-                // use algorithm to get hyphenation points
-                word[0] = '.'; // word start marker
-                word[len + 1] = '.'; // word end marker
-                word[len + 2] = (char)0; // null terminated
-                sbyte[] il = new sbyte[len + 3]; // initialized to zero
-                for (i = 0; i < len + 1; i++)
-                {
-                    SearchPatterns(word, i, il);
-                }
-
-                // hyphenation points are located where interletter value is odd
-                // i is letterindex(word),
-                // i + 1 is index(word),
-                // result[k] = corresponding index(w)
-                for (i = 0; i < len; i++)
-                {
-                    if (((il[i + 1] & 1) == 1) && i >= remainCharCount && i <= (len - pushCharCount))
-                    {
-                        result[k++] = i + iIgnoreAtBeginning;
-                    }
-                }
-            }
-
-            if (k > 0)
-            {
-                // trim result array
-                int[] res = new int[k + 2];
-                Array.Copy(result, 0, res, 1, k);
-                // We add the synthetical hyphenation points
-                // at the beginning and end of the word
-                res[0] = 0;
-                res[k + 1] = len;
-                return new Hyphenation(res);
-            }
-            else
-            {
-                return null;
-            }
-        }
-
-        /// <summary>
-        /// Add a character class to the tree. It is used by
-        /// <seealso cref="PatternParser PatternParser"/> as callback to add character classes.
-        /// Character classes define the valid word characters for hyphenation. If a
-        /// word contains a character not defined in any of the classes, it is not
-        /// hyphenated. It also defines a way to normalize the characters in order to
-        /// compare them with the stored patterns. Usually pattern files use only lower
-        /// case characters, in this case a class for letter 'a', for example, should
-        /// be defined as "aA", the first character being the normalization char.
-        /// </summary>
-        public virtual void AddClass(string chargroup)
-        {
-            if (chargroup.Length > 0)
-            {
-                char equivChar = chargroup[0];
-                char[] key = new char[2];
-                key[1] = (char)0;
-                for (int i = 0; i < chargroup.Length; i++)
-                {
-                    key[0] = chargroup[i];
-                    classmap.Insert(key, 0, equivChar);
-                }
-            }
-        }
-
-        /// <summary>
-        /// Add an exception to the tree. It is used by
-        /// <seealso cref="PatternParser PatternParser"/> class as callback to store the
-        /// hyphenation exceptions.
-        /// </summary>
-        /// <param name="word"> normalized word </param>
-        /// <param name="hyphenatedword"> a vector of alternating strings and
-        ///        <seealso cref="Hyphen hyphen"/> objects. </param>
-        public virtual void AddException(string word, List<object> hyphenatedword)
-        {
-            stoplist[word] = hyphenatedword;
-        }
-
-        /// <summary>
-        /// Add a pattern to the tree. Mainly, to be used by
-        /// <seealso cref="PatternParser PatternParser"/> class as callback to add a pattern to
-        /// the tree.
-        /// </summary>
-        /// <param name="pattern"> the hyphenation pattern </param>
-        /// <param name="ivalue"> interletter weight values indicating the desirability and
-        ///        priority of hyphenating at a given point within the pattern. It
-        ///        should contain only digit characters. (i.e. '0' to '9'). </param>
-        public virtual void AddPattern(string pattern, string ivalue)
-        {
-            int k = ivalues.Find(ivalue);
-            if (k <= 0)
-            {
-                k = PackValues(ivalue);
-                ivalues.Insert(ivalue, (char)k);
-            }
-            Insert(pattern, (char)k);
-        }
-
-        // public override void printStats(PrintStream @out)
-        // {
-        //@out.println("Value space size = " + Convert.ToString(vspace.length()));
-        //base.printStats(@out);
-
-        // }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
deleted file mode 100644
index 069badd..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
+++ /dev/null
@@ -1,54 +0,0 @@
-\ufeffusing System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// This interface is used to connect the XML pattern file parser to the
-    /// hyphenation tree.
-    /// 
-    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
-    /// </summary>
-    public interface IPatternConsumer
-	{
-
-	  /// <summary>
-	  /// Add a character class. A character class defines characters that are
-	  /// considered equivalent for the purpose of hyphenation (e.g. "aA"). It
-	  /// usually means to ignore case.
-	  /// </summary>
-	  /// <param name="chargroup"> character group </param>
-	  void AddClass(string chargroup);
-
-	  /// <summary>
-	  /// Add a hyphenation exception. An exception replaces the result obtained by
-	  /// the algorithm for cases for which this fails or the user wants to provide
-	  /// his own hyphenation. A hyphenatedword is a vector of alternating String's
-	  /// and <seealso cref="Hyphen"/> instances
-	  /// </summary>
-	  void AddException(string word, List<object> hyphenatedword);
-
-	  /// <summary>
-	  /// Add hyphenation patterns.
-	  /// </summary>
-	  /// <param name="pattern"> the pattern </param>
-	  /// <param name="values"> interletter values expressed as a string of digit characters. </param>
-	  void AddPattern(string pattern, string values);
-	}
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
deleted file mode 100644
index 8c00d19..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
+++ /dev/null
@@ -1,483 +0,0 @@
-\ufeffusing System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text;
-using System.Xml;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     * 
-     *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// A XMLReader document handler to read and parse hyphenation patterns from a XML
-    /// file.
-    /// 
-    /// LUCENENET: This class has been refactored from its Java counterpart to use XmlReader rather
-    /// than a SAX parser.
-    /// </summary>
-    public class PatternParser
-    {
-        internal int currElement;
-
-        internal IPatternConsumer consumer;
-
-        internal StringBuilder token;
-
-        internal List<object> exception;
-
-        internal char hyphenChar;
-
-        internal string errMsg;
-
-        internal const int ELEM_CLASSES = 1;
-
-        internal const int ELEM_EXCEPTIONS = 2;
-
-        internal const int ELEM_PATTERNS = 3;
-
-        internal const int ELEM_HYPHEN = 4;
-
-        public PatternParser()
-        {
-            token = new StringBuilder();
-            hyphenChar = '-'; // default
-        }
-
-        public PatternParser(IPatternConsumer consumer) : this()
-        {
-            this.consumer = consumer;
-        }
-
-        public virtual IPatternConsumer Consumer
-        {
-            set
-            {
-                this.consumer = value;
-            }
-        }
-
-        /// <summary>
-        /// Parses a hyphenation pattern file.
-        /// </summary>
-        /// <param name="filename"> the filename </param>
-        /// <exception cref="IOException"> In case of an exception while parsing </exception>
-        public virtual void Parse(string filename)
-        {
-            // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
-            using (var src = XmlReader.Create(filename, new XmlReaderSettings
-            {
-                DtdProcessing = DtdProcessing.Parse,
-                XmlResolver = new DtdResolver()
-            }))
-            {
-                Parse(src);
-            }
-        }
-
-        /// <summary>
-        /// Parses a hyphenation pattern file.
-        /// </summary>
-        /// <param name="file"> the pattern file </param>
-        public virtual void Parse(FileInfo file)
-        {
-            Parse(file, Encoding.UTF8);
-        }
-
-        /// <summary>
-        /// Parses a hyphenation pattern file.
-        /// </summary>
-        /// <param name="file"> the pattern file </param>
-        public virtual void Parse(FileInfo file, Encoding encoding)
-        {
-            using (var src = XmlReader.Create(new StreamReader(file.FullName, encoding), new XmlReaderSettings
-            {
-                DtdProcessing = DtdProcessing.Parse,
-                XmlResolver = new DtdResolver()
-            }))
-            {
-
-                Parse(src);
-            }
-        }
-
-        /// <summary>
-        /// Parses a hyphenation pattern file.
-        /// </summary>
-        /// <param name="file"> the pattern file </param>
-        public virtual void Parse(Stream xmlStream)
-        {
-            using (var src = XmlReader.Create(xmlStream, new XmlReaderSettings
-            {
-                DtdProcessing = DtdProcessing.Parse,
-                XmlResolver = new DtdResolver()
-            }))
-            {
-                Parse(src);
-            }
-        }
-
-        /// <summary>
-        /// Parses a hyphenation pattern file.
-        /// </summary>
-        /// <param name="source"> the InputSource for the file </param>
-        /// <exception cref="IOException"> In case of an exception while parsing </exception>
-        public virtual void Parse(XmlReader source)
-        {
-            source.MoveToContent();
-            while (source.Read())
-            {
-                ParseNode(source);
-            }
-        }
-
-        private void ParseNode(XmlReader node)
-        {
-            string uri, name, raw;
-            switch (node.NodeType)
-            {
-                case XmlNodeType.Element:
-
-                    // Element start
-                    uri = node.NamespaceURI;
-                    name = node.Name;
-                    bool isEmptyElement = node.IsEmptyElement;
-                    var attributes = GetAttributes(node);
-                    raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer
-
-                    this.StartElement(uri, name, raw, attributes);
-                    if (isEmptyElement)
-                    {
-                        this.EndElement(uri, name, raw);
-                    }
-                    break;
-
-                case XmlNodeType.Text:
-
-                    this.Characters(node.Value.ToCharArray(), 0, node.Value.Length);
-                    break;
-
-                case XmlNodeType.EndElement:
-                    uri = node.NamespaceURI;
-                    name = node.Name;
-                    raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer
-
-                    // Element end
-                    this.EndElement(uri, name, raw);
-                    break;
-            }
-        }
-
-        private IDictionary<string, string> GetAttributes(XmlReader node)
-        {
-            var result = new Dictionary<string, string>();
-            if (node.HasAttributes)
-            {
-                for (int i = 0; i < node.AttributeCount; i++)
-                {
-                    node.MoveToAttribute(i);
-                    result.Add(node.Name, node.Value);
-                }
-            }
-
-            return result;
-        }
-
-        protected internal virtual string ReadToken(StringBuilder chars)
-        {
-            string word;
-            bool space = false;
-            int i;
-            for (i = 0; i < chars.Length; i++)
-            {
-                if (char.IsWhiteSpace(chars[i]))
-                {
-                    space = true;
-                }
-                else
-                {
-                    break;
-                }
-            }
-            if (space)
-            {
-                // chars.delete(0,i);
-                for (int countr = i; countr < chars.Length; countr++)
-                {
-                    chars[countr - i] = chars[countr];
-                }
-                chars.Length = chars.Length - i;
-                if (token.Length > 0)
-                {
-                    word = token.ToString();
-                    token.Length = 0;
-                    return word;
-                }
-            }
-            space = false;
-            for (i = 0; i < chars.Length; i++)
-            {
-                if (char.IsWhiteSpace(chars[i]))
-                {
-                    space = true;
-                    break;
-                }
-            }
-            token.Append(chars.ToString(0, i - 0));
-            // chars.delete(0,i);
-            for (int countr = i; countr < chars.Length; countr++)
-            {
-                chars[countr - i] = chars[countr];
-            }
-            chars.Length = chars.Length - i;
-            if (space)
-            {
-                word = token.ToString();
-                token.Length = 0;
-                return word;
-            }
-            token.Append(chars.ToString());
-            return null;
-        }
-
-        protected internal static string GetPattern(string word)
-        {
-            StringBuilder pat = new StringBuilder();
-            int len = word.Length;
-            for (int i = 0; i < len; i++)
-            {
-                if (!char.IsDigit(word[i]))
-                {
-                    pat.Append(word[i]);
-                }
-            }
-            return pat.ToString();
-        }
-
-        protected internal virtual List<object> NormalizeException<T1>(List<T1> ex)
-        {
-            List<object> res = new List<object>();
-            for (int i = 0; i < ex.Count; i++)
-            {
-                object item = ex[i];
-                if (item is string)
-                {
-                    string str = (string)item;
-                    StringBuilder buf = new StringBuilder();
-                    for (int j = 0; j < str.Length; j++)
-                    {
-                        char c = str[j];
-                        if (c != hyphenChar)
-                        {
-                            buf.Append(c);
-                        }
-                        else
-                        {
-                            res.Add(buf.ToString());
-                            buf.Length = 0;
-                            char[] h = new char[1];
-                            h[0] = hyphenChar;
-                            // we use here hyphenChar which is not necessarily
-                            // the one to be printed
-                            res.Add(new Hyphen(new string(h), null, null));
-                        }
-                    }
-                    if (buf.Length > 0)
-                    {
-                        res.Add(buf.ToString());
-                    }
-                }
-                else
-                {
-                    res.Add(item);
-                }
-            }
-            return res;
-        }
-
-        protected internal virtual string GetExceptionWord<T1>(List<T1> ex)
-        {
-            StringBuilder res = new StringBuilder();
-            for (int i = 0; i < ex.Count; i++)
-            {
-                object item = ex[i];
-                if (item is string)
-                {
-                    res.Append((string)item);
-                }
-                else
-                {
-                    if (((Hyphen)item).noBreak != null)
-                    {
-                        res.Append(((Hyphen)item).noBreak);
-                    }
-                }
-            }
-            return res.ToString();
-        }
-
-        protected internal static string GetInterletterValues(string pat)
-        {
-            StringBuilder il = new StringBuilder();
-            string word = pat + "a"; // add dummy letter to serve as sentinel
-            int len = word.Length;
-            for (int i = 0; i < len; i++)
-            {
-                char c = word[i];
-                if (char.IsDigit(c))
-                {
-                    il.Append(c);
-                    i++;
-                }
-                else
-                {
-                    il.Append('0');
-                }
-            }
-            return il.ToString();
-        }
-
-        /// <summary>
-        /// LUCENENET specific helper class to force the DTD file to be read from the embedded resource
-        /// rather than from the file system.
-        /// </summary>
-        internal class DtdResolver : XmlUrlResolver
-        {
-            public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
-            {
-                string dtdFilename = "hyphenation.dtd";
-                if (dtdFilename.Equals(absoluteUri.Segments.LastOrDefault(), StringComparison.OrdinalIgnoreCase))
-                {
-                    var qualifedDtdFilename = string.Concat(GetType().Namespace, ".", dtdFilename);
-                    return GetType().Assembly.GetManifestResourceStream(qualifedDtdFilename);
-                }
-
-                return base.GetEntity(absoluteUri, role, ofObjectToReturn);
-            }
-        }
-
-        //
-        // ContentHandler methods
-        //
-
-        /// <seealso cref= org.xml.sax.ContentHandler#startElement(java.lang.String,
-        ///      java.lang.String, java.lang.String, org.xml.sax.Attributes) </seealso>
-        public void StartElement(string uri, string local, string raw, IDictionary<string, string> attrs)
-        {
-            if (local.Equals("hyphen-char"))
-            {
-                string h = attrs.ContainsKey("value") ? attrs["value"] : null;
-                if (h != null && h.Length == 1)
-                {
-                    hyphenChar = h[0];
-                }
-            }
-            else if (local.Equals("classes"))
-            {
-                currElement = ELEM_CLASSES;
-            }
-            else if (local.Equals("patterns"))
-            {
-                currElement = ELEM_PATTERNS;
-            }
-            else if (local.Equals("exceptions"))
-            {
-                currElement = ELEM_EXCEPTIONS;
-                exception = new List<object>();
-            }
-            else if (local.Equals("hyphen"))
-            {
-                if (token.Length > 0)
-                {
-                    exception.Add(token.ToString());
-                }
-                exception.Add(new Hyphen(attrs["pre"], attrs["no"], attrs["post"]));
-                currElement = ELEM_HYPHEN;
-            }
-            token.Length = 0;
-        }
-
-        /// <seealso cref= org.xml.sax.ContentHandler#endElement(java.lang.String,
-        ///      java.lang.String, java.lang.String) </seealso>
-        public void EndElement(string uri, string local, string raw)
-        {
-            if (token.Length > 0)
-            {
-                string word = token.ToString();
-                switch (currElement)
-                {
-                    case ELEM_CLASSES:
-                        consumer.AddClass(word);
-                        break;
-                    case ELEM_EXCEPTIONS:
-                        exception.Add(word);
-                        exception = NormalizeException(exception);
-                        consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
-                        break;
-                    case ELEM_PATTERNS:
-                        consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
-                        break;
-                    case ELEM_HYPHEN:
-                        // nothing to do
-                        break;
-                }
-                if (currElement != ELEM_HYPHEN)
-                {
-                    token.Length = 0;
-                }
-            }
-            if (currElement == ELEM_HYPHEN)
-            {
-                currElement = ELEM_EXCEPTIONS;
-            }
-            else
-            {
-                currElement = 0;
-            }
-        }
-
-        /// <seealso cref= org.xml.sax.ContentHandler#characters(char[], int, int) </seealso>
-        public void Characters(char[] ch, int start, int length)
-        {
-            StringBuilder chars = new StringBuilder(length);
-            chars.Append(ch, start, length);
-            string word = ReadToken(chars);
-            while (word != null)
-            {
-                // System.out.println("\"" + word + "\"");
-                switch (currElement)
-                {
-                    case ELEM_CLASSES:
-                        consumer.AddClass(word);
-                        break;
-                    case ELEM_EXCEPTIONS:
-                        exception.Add(word);
-                        exception = NormalizeException(exception);
-                        consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
-                        exception.Clear();
-                        break;
-                    case ELEM_PATTERNS:
-                        consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
-                        break;
-                }
-                word = ReadToken(chars);
-            }
-
-        }
-    }
-}
\ No newline at end of file