You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2016/12/14 20:00:48 UTC
[2/4] lucenenet git commit: Renamed hyphenation to Hyphenation to fix
build and run on case sensitive file systems
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/hyphenation.dtd
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/hyphenation.dtd b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/hyphenation.dtd
new file mode 100644
index 0000000..083c2bd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/hyphenation.dtd
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+ Copyright 1999-2004 The Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- $Id: hyphenation.dtd,v 1.3 2004/02/27 18:34:59 jeremias Exp $ -->
+
+<!ELEMENT hyphenation-info (hyphen-char?, hyphen-min?,
+ classes, exceptions?, patterns)>
+
+<!-- Hyphen character to be used in the exception list as shortcut for
+ <hyphen pre-break="-"/>. Defaults to '-'
+-->
+<!ELEMENT hyphen-char EMPTY>
+<!ATTLIST hyphen-char value CDATA #REQUIRED>
+
+<!-- Default minimun length in characters of hyphenated word fragments
+ before and after the line break. For some languages this is not
+ only for aesthetic purposes, wrong hyphens may be generated if this
+ is not accounted for.
+-->
+<!ELEMENT hyphen-min EMPTY>
+<!ATTLIST hyphen-min before CDATA #REQUIRED>
+<!ATTLIST hyphen-min after CDATA #REQUIRED>
+
+<!-- Character equivalent classes: space separated list of character groups, all
+ characters in a group are to be treated equivalent as far as
+ the hyphenation algorithm is concerned. The first character in a group
+ is the group's equivalent character. Patterns should only contain
+ first characters. It also defines word characters, i.e. a word that
+ contains characters not present in any of the classes is not hyphenated.
+-->
+<!ELEMENT classes (#PCDATA)>
+
+<!-- Hyphenation exceptions: space separated list of hyphenated words.
+ A hyphen is indicated by the hyphen tag, but you can use the
+ hyphen-char defined previously as shortcut. This is in cases
+ when the algorithm procedure finds wrong hyphens or you want
+ to provide your own hyphenation for some words.
+-->
+<!ELEMENT exceptions (#PCDATA|hyphen)* >
+
+<!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
+ characters as described before, between any two word characters a digit
+ in the range 0 to 9 may be specified. The absence of a digit is equivalent
+ to zero. The '.' character is reserved to indicate begining or ending
+ of words. -->
+<!ELEMENT patterns (#PCDATA)>
+
+<!-- A "full hyphen" equivalent to TeX's \discretionary
+ with pre-break, post-break and no-break attributes.
+ To be used in the exceptions list, the hyphen character is not
+ automatically added -->
+<!ELEMENT hyphen EMPTY>
+<!ATTLIST hyphen pre CDATA #IMPLIED>
+<!ATTLIST hyphen no CDATA #IMPLIED>
+<!ATTLIST hyphen post CDATA #IMPLIED>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
deleted file mode 100644
index 6442d11..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
+++ /dev/null
@@ -1,156 +0,0 @@
-\ufeffnamespace Lucene.Net.Analysis.Compound.Hyphenation
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// This class implements a simple byte vector with access to the underlying
- /// array.
- /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
- /// </summary>
- public class ByteVector
- {
-
- /// <summary>
- /// Capacity increment size
- /// </summary>
- private const int DEFAULT_BLOCK_SIZE = 2048;
-
- private int blockSize;
-
- /// <summary>
- /// The encapsulated array
- /// </summary>
- private sbyte[] array;
-
- /// <summary>
- /// Points to next free item
- /// </summary>
- private int n;
-
- public ByteVector() : this(DEFAULT_BLOCK_SIZE)
- {
- }
-
- public ByteVector(int capacity)
- {
- if (capacity > 0)
- {
- blockSize = capacity;
- }
- else
- {
- blockSize = DEFAULT_BLOCK_SIZE;
- }
- array = new sbyte[blockSize];
- n = 0;
- }
-
- public ByteVector(sbyte[] a)
- {
- blockSize = DEFAULT_BLOCK_SIZE;
- array = a;
- n = 0;
- }
-
- public ByteVector(sbyte[] a, int capacity)
- {
- if (capacity > 0)
- {
- blockSize = capacity;
- }
- else
- {
- blockSize = DEFAULT_BLOCK_SIZE;
- }
- array = a;
- n = 0;
- }
-
- public virtual sbyte[] Array
- {
- get
- {
- return array;
- }
- }
-
- /// <summary>
- /// LUCENENET indexer for .NET
- /// </summary>
- /// <param name="index"></param>
- /// <returns></returns>
- public virtual sbyte this[int index]
- {
- get { return array[index]; }
- set { array[index] = value; }
- }
-
- /// <summary>
- /// return number of items in array
- /// </summary>
- public virtual int Length
- {
- get { return n; }
- }
-
- /// <summary>
- /// returns current capacity of array
- /// </summary>
- public virtual int Capacity
- {
- get { return array.Length; }
- }
-
- //public virtual void Put(int index, sbyte val)
- //{
- // array[index] = val;
- //}
-
- //public virtual sbyte Get(int index)
- //{
- // return array[index];
- //}
-
- /// <summary>
- /// This is to implement memory allocation in the array. Like malloc().
- /// </summary>
- public virtual int Alloc(int size)
- {
- int index = n;
- int len = array.Length;
- if (n + size >= len)
- {
- sbyte[] aux = new sbyte[len + blockSize];
- System.Array.Copy(array, 0, aux, 0, len);
- array = aux;
- }
- n += size;
- return index;
- }
-
- public virtual void TrimToSize()
- {
- if (n < array.Length)
- {
- sbyte[] aux = new sbyte[n];
- System.Array.Copy(array, 0, aux, 0, n);
- array = aux;
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
deleted file mode 100644
index 26fcea5..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
+++ /dev/null
@@ -1,171 +0,0 @@
-\ufeffusing System;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// This class implements a simple char vector with access to the underlying
- /// array.
- ///
- /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
- /// </summary>
- public class CharVector : ICloneable
- {
-
- /// <summary>
- /// Capacity increment size
- /// </summary>
- private const int DEFAULT_BLOCK_SIZE = 2048;
-
- private int blockSize;
-
- /// <summary>
- /// The encapsulated array
- /// </summary>
- private char[] array;
-
- /// <summary>
- /// Points to next free item
- /// </summary>
- private int n;
-
- public CharVector() : this(DEFAULT_BLOCK_SIZE)
- {
- }
-
- public CharVector(int capacity)
- {
- if (capacity > 0)
- {
- blockSize = capacity;
- }
- else
- {
- blockSize = DEFAULT_BLOCK_SIZE;
- }
- array = new char[blockSize];
- n = 0;
- }
-
- public CharVector(char[] a)
- {
- blockSize = DEFAULT_BLOCK_SIZE;
- array = a;
- n = a.Length;
- }
-
- public CharVector(char[] a, int capacity)
- {
- if (capacity > 0)
- {
- blockSize = capacity;
- }
- else
- {
- blockSize = DEFAULT_BLOCK_SIZE;
- }
- array = a;
- n = a.Length;
- }
-
- /// <summary>
- /// Reset Vector but don't resize or clear elements
- /// </summary>
- public virtual void Clear()
- {
- n = 0;
- }
-
- public virtual object Clone()
- {
- CharVector cv = new CharVector(array, blockSize);
- cv.n = this.n;
- return cv;
- }
-
- public virtual char[] Array
- {
- get
- {
- return array;
- }
- }
-
- /// <summary>
- /// LUCENENET indexer for .NET
- /// </summary>
- /// <param name="index"></param>
- /// <returns></returns>
- public virtual char this[int index]
- {
- get { return array[index]; }
- set { array[index] = value; }
- }
-
- /// <summary>
- /// return number of items in array
- /// </summary>
- public virtual int Length()
- {
- return n;
- }
-
- /// <summary>
- /// returns current capacity of array
- /// </summary>
- public virtual int Capacity
- {
- get { return array.Length; }
- }
-
- //public virtual void Put(int index, char val)
- //{
- // array[index] = val;
- //}
-
- //public virtual char get(int index)
- //{
- // return array[index];
- //}
-
- public virtual int Alloc(int size)
- {
- int index = n;
- int len = array.Length;
- if (n + size >= len)
- {
- char[] aux = new char[len + blockSize];
- System.Array.Copy(array, 0, aux, 0, len);
- array = aux;
- }
- n += size;
- return index;
- }
-
- public virtual void TrimToSize()
- {
- if (n < array.Length)
- {
- char[] aux = new char[n];
- System.Array.Copy(array, 0, aux, 0, n);
- array = aux;
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
deleted file mode 100644
index 91009b1..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
+++ /dev/null
@@ -1,72 +0,0 @@
-\ufeffusing System.Text;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// This class represents a hyphen. A 'full' hyphen is made of 3 parts: the
- /// pre-break text, post-break text and no-break. If no line-break is generated
- /// at this position, the no-break text is used, otherwise, pre-break and
- /// post-break are used. Typically, pre-break is equal to the hyphen character
- /// and the others are empty. However, this general scheme allows support for
- /// cases in some languages where words change spelling if they're split across
- /// lines, like german's 'backen' which hyphenates 'bak-ken'. BTW, this comes
- /// from TeX.
- ///
- /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
- /// </summary>
- public class Hyphen
- {
- public string preBreak;
-
- public string noBreak;
-
- public string postBreak;
-
- internal Hyphen(string pre, string no, string post)
- {
- preBreak = pre;
- noBreak = no;
- postBreak = post;
- }
-
- internal Hyphen(string pre)
- {
- preBreak = pre;
- noBreak = null;
- postBreak = null;
- }
-
- public override string ToString()
- {
- if (noBreak == null && postBreak == null && preBreak != null && preBreak.Equals("-"))
- {
- return "-";
- }
- StringBuilder res = new StringBuilder("{");
- res.Append(preBreak);
- res.Append("}{");
- res.Append(postBreak);
- res.Append("}{");
- res.Append(noBreak);
- res.Append('}');
- return res.ToString();
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
deleted file mode 100644
index fdbac29..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
+++ /dev/null
@@ -1,53 +0,0 @@
-\ufeffnamespace Lucene.Net.Analysis.Compound.Hyphenation
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// This class represents a hyphenated word.
- ///
- /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
- /// </summary>
- public class Hyphenation
- {
-
- private readonly int[] hyphenPoints;
-
- /// <summary>
- /// rawWord as made of alternating strings and <seealso cref="Hyphen"/> instances
- /// </summary>
- internal Hyphenation(int[] points)
- {
- hyphenPoints = points;
- }
-
- /// <returns> the number of hyphenation points in the word </returns>
- public virtual int Length
- {
- get { return hyphenPoints.Length; }
- }
-
- /// <returns> the hyphenation points </returns>
- public virtual int[] HyphenationPoints
- {
- get
- {
- return hyphenPoints;
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
deleted file mode 100644
index 287f6f3..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
+++ /dev/null
@@ -1,581 +0,0 @@
-\ufeffusing Lucene.Net.Support;
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Text;
-using System.Xml;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// This tree structure stores the hyphenation patterns in an efficient way for
- /// fast lookup. It provides the provides the method to hyphenate a word.
- ///
- /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
- /// </summary>
- public class HyphenationTree : TernaryTree, IPatternConsumer
- {
-
- /// <summary>
- /// value space: stores the interletter values
- /// </summary>
- protected internal ByteVector vspace;
-
- /// <summary>
- /// This map stores hyphenation exceptions
- /// </summary>
- protected internal IDictionary<string, IList<object>> stoplist;
-
- /// <summary>
- /// This map stores the character classes
- /// </summary>
- protected internal TernaryTree classmap;
-
- /// <summary>
- /// Temporary map to store interletter values on pattern loading.
- /// </summary>
- [NonSerialized]
- private TernaryTree ivalues;
-
- public HyphenationTree()
- {
- stoplist = new HashMap<string, IList<object>>(23); // usually a small table
- classmap = new TernaryTree();
- vspace = new ByteVector();
- vspace.Alloc(1); // this reserves index 0, which we don't use
- }
-
- /// <summary>
- /// Packs the values by storing them in 4 bits, two values into a byte Values
- /// range is from 0 to 9. We use zero as terminator, so we'll add 1 to the
- /// value.
- /// </summary>
- /// <param name="values"> a string of digits from '0' to '9' representing the
- /// interletter values. </param>
- /// <returns> the index into the vspace array where the packed values are stored. </returns>
- protected internal virtual int PackValues(string values)
- {
- int i, n = values.Length;
- int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
- int offset = vspace.Alloc(m);
- sbyte[] va = vspace.Array;
- for (i = 0; i < n; i++)
- {
- int j = i >> 1;
- sbyte v = (sbyte)((values[i] - '0' + 1) & 0x0f);
- if ((i & 1) == 1)
- {
- va[j + offset] = (sbyte)(va[j + offset] | v);
- }
- else
- {
- va[j + offset] = (sbyte)(v << 4); // big endian
- }
- }
- va[m - 1 + offset] = 0; // terminator
- return offset;
- }
-
- protected internal virtual string UnpackValues(int k)
- {
- StringBuilder buf = new StringBuilder();
- sbyte v = vspace[k++];
- while (v != 0)
- {
- char c = (char)(((int)((uint)v >> 4)) - 1 + '0');
- buf.Append(c);
- c = (char)(v & 0x0f);
- if (c == 0)
- {
- break;
- }
- c = (char)(c - 1 + '0');
- buf.Append(c);
- v = vspace[k++];
- }
- return buf.ToString();
- }
-
- /// <summary>
- /// Read hyphenation patterns from an XML file.
- /// </summary>
- /// <param name="f"> the filename </param>
- /// <exception cref="IOException"> In case the parsing fails </exception>
- public virtual void LoadPatterns(string filename)
- {
- LoadPatterns(filename, Encoding.UTF8);
- }
-
- /// <summary>
- /// Read hyphenation patterns from an XML file.
- /// </summary>
- /// <param name="f"> the filename </param>
- /// <exception cref="IOException"> In case the parsing fails </exception>
- public virtual void LoadPatterns(string filename, Encoding encoding)
- {
- var src = new FileStream(filename, FileMode.Open, FileAccess.Read);
- LoadPatterns(src, encoding);
- }
-
- /// <summary>
- /// Read hyphenation patterns from an XML file.
- /// </summary>
- /// <param name="f"> the filename </param>
- /// <exception cref="IOException"> In case the parsing fails </exception>
- public virtual void LoadPatterns(FileInfo f)
- {
- LoadPatterns(f, Encoding.UTF8);
- }
-
- /// <summary>
- /// Read hyphenation patterns from an XML file.
- /// </summary>
- /// <param name="f"> the filename </param>
- /// <exception cref="IOException"> In case the parsing fails </exception>
- public virtual void LoadPatterns(FileInfo f, Encoding encoding)
- {
- var src = new FileStream(f.FullName, FileMode.Open, FileAccess.Read);
- LoadPatterns(src, encoding);
- }
-
- /// <summary>
- /// Read hyphenation patterns from an XML file.
- /// </summary>
- /// <param name="source"> the InputSource for the file </param>
- /// <exception cref="IOException"> In case the parsing fails </exception>
- public virtual void LoadPatterns(Stream source)
- {
- LoadPatterns(source, Encoding.UTF8);
- }
-
- /// <summary>
- /// Read hyphenation patterns from an XML file.
- /// </summary>
- /// <param name="source"> the InputSource for the file </param>
- /// <exception cref="IOException"> In case the parsing fails </exception>
- public virtual void LoadPatterns(Stream source, Encoding encoding)
- {
- // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
- using (var reader = XmlReader.Create(new StreamReader(source, encoding), new XmlReaderSettings
- {
- DtdProcessing = DtdProcessing.Parse,
- XmlResolver = new PatternParser.DtdResolver()
- }))
- {
- LoadPatterns(reader);
- }
- }
-
- public virtual void LoadPatterns(XmlReader source)
- {
- PatternParser pp = new PatternParser(this);
- ivalues = new TernaryTree();
-
- pp.Parse(source);
-
- // patterns/values should be now in the tree
- // let's optimize a bit
- TrimToSize();
- vspace.TrimToSize();
- classmap.TrimToSize();
-
- // get rid of the auxiliary map
- ivalues = null;
- }
-
- public virtual string FindPattern(string pat)
- {
- int k = base.Find(pat);
- if (k >= 0)
- {
- return UnpackValues(k);
- }
- return "";
- }
-
- /// <summary>
- /// String compare, returns 0 if equal or t is a substring of s
- /// </summary>
- protected internal virtual int HStrCmp(char[] s, int si, char[] t, int ti)
- {
- for (; s[si] == t[ti]; si++, ti++)
- {
- if (s[si] == 0)
- {
- return 0;
- }
- }
- if (t[ti] == 0)
- {
- return 0;
- }
- return s[si] - t[ti];
- }
-
- protected internal virtual sbyte[] GetValues(int k)
- {
- StringBuilder buf = new StringBuilder();
- sbyte v = vspace[k++];
- while (v != 0)
- {
- char c = (char)((((int)((uint)v >> 4))) - 1);
- buf.Append(c);
- c = (char)(v & 0x0f);
- if (c == 0)
- {
- break;
- }
- c = (char)(c - 1);
- buf.Append(c);
- v = vspace[k++];
- }
- sbyte[] res = new sbyte[buf.Length];
- for (int i = 0; i < res.Length; i++)
- {
- res[i] = (sbyte)buf[i];
- }
- return res;
- }
-
- /// <summary>
- /// <para>
- /// Search for all possible partial matches of word starting at index an update
- /// interletter values. In other words, it does something like:
- /// </para>
- /// <code>
- /// for(i=0; i<patterns.length; i++) {
- /// if ( word.substring(index).startsWidth(patterns[i]) )
- /// update_interletter_values(patterns[i]);
- /// }
- /// </code>
- /// <para>
- /// But it is done in an efficient way since the patterns are stored in a
- /// ternary tree. In fact, this is the whole purpose of having the tree: doing
- /// this search without having to test every single pattern. The number of
- /// patterns for languages such as English range from 4000 to 10000. Thus,
- /// doing thousands of string comparisons for each word to hyphenate would be
- /// really slow without the tree. The tradeoff is memory, but using a ternary
- /// tree instead of a trie, almost halves the the memory used by Lout or TeX.
- /// It's also faster than using a hash table
- /// </para>
- /// </summary>
- /// <param name="word"> null terminated word to match </param>
- /// <param name="index"> start index from word </param>
- /// <param name="il"> interletter values array to update </param>
- protected internal virtual void SearchPatterns(char[] word, int index, sbyte[] il)
- {
- sbyte[] values;
- int i = index;
- char p, q;
- char sp = word[i];
- p = root;
-
- while (p > 0 && p < sc.Length)
- {
- if (sc[p] == 0xFFFF)
- {
- if (HStrCmp(word, i, kv.Array, lo[p]) == 0)
- {
- values = GetValues(eq[p]); // data pointer is in eq[]
- int j = index;
- for (int k = 0; k < values.Length; k++)
- {
- if (j < il.Length && values[k] > il[j])
- {
- il[j] = values[k];
- }
- j++;
- }
- }
- return;
- }
- int d = sp - sc[p];
- if (d == 0)
- {
- if (sp == 0)
- {
- break;
- }
- sp = word[++i];
- p = eq[p];
- q = p;
-
- // look for a pattern ending at this position by searching for
- // the null char ( splitchar == 0 )
- while (q > 0 && q < sc.Length)
- {
- if (sc[q] == 0xFFFF) // stop at compressed branch
- {
- break;
- }
- if (sc[q] == 0)
- {
- values = GetValues(eq[q]);
- int j = index;
- for (int k = 0; k < values.Length; k++)
- {
- if (j < il.Length && values[k] > il[j])
- {
- il[j] = values[k];
- }
- j++;
- }
- break;
- }
- else
- {
- q = lo[q];
-
- /// <summary>
- /// actually the code should be: q = sc[q] < 0 ? hi[q] : lo[q]; but
- /// java chars are unsigned
- /// </summary>
- }
- }
- }
- else
- {
- p = d < 0 ? lo[p] : hi[p];
- }
- }
- }
-
- /// <summary>
- /// Hyphenate word and return a Hyphenation object.
- /// </summary>
- /// <param name="word"> the word to be hyphenated </param>
- /// <param name="remainCharCount"> Minimum number of characters allowed before the
- /// hyphenation point. </param>
- /// <param name="pushCharCount"> Minimum number of characters allowed after the
- /// hyphenation point. </param>
- /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
- /// hyphenated word or null if word is not hyphenated. </returns>
- public virtual Hyphenation Hyphenate(string word, int remainCharCount, int pushCharCount)
- {
- char[] w = word.ToCharArray();
- return Hyphenate(w, 0, w.Length, remainCharCount, pushCharCount);
- }
-
- /// <summary>
- /// w = "****nnllllllnnn*****", where n is a non-letter, l is a letter, all n
- /// may be absent, the first n is at offset, the first l is at offset +
- /// iIgnoreAtBeginning; word = ".llllll.'\0'***", where all l in w are copied
- /// into word. In the first part of the routine len = w.length, in the second
- /// part of the routine len = word.length. Three indices are used: index(w),
- /// the index in w, index(word), the index in word, letterindex(word), the
- /// index in the letter part of word. The following relations exist: index(w) =
- /// offset + i - 1 index(word) = i - iIgnoreAtBeginning letterindex(word) =
- /// index(word) - 1 (see first loop). It follows that: index(w) - index(word) =
- /// offset - 1 + iIgnoreAtBeginning index(w) = letterindex(word) + offset +
- /// iIgnoreAtBeginning
- /// </summary>
-
- /// <summary>
- /// Hyphenate word and return an array of hyphenation points.
- /// </summary>
- /// <param name="w"> char array that contains the word </param>
- /// <param name="offset"> Offset to first character in word </param>
- /// <param name="len"> Length of word </param>
- /// <param name="remainCharCount"> Minimum number of characters allowed before the
- /// hyphenation point. </param>
- /// <param name="pushCharCount"> Minimum number of characters allowed after the
- /// hyphenation point. </param>
- /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
- /// hyphenated word or null if word is not hyphenated. </returns>
- public virtual Hyphenation Hyphenate(char[] w, int offset, int len, int remainCharCount, int pushCharCount)
- {
- int i;
- char[] word = new char[len + 3];
-
- // normalize word
- char[] c = new char[2];
- int iIgnoreAtBeginning = 0;
- int iLength = len;
- bool bEndOfLetters = false;
- for (i = 1; i <= len; i++)
- {
- c[0] = w[offset + i - 1];
- int nc = classmap.Find(c, 0);
- if (nc < 0) // found a non-letter character ...
- {
- if (i == (1 + iIgnoreAtBeginning))
- {
- // ... before any letter character
- iIgnoreAtBeginning++;
- }
- else
- {
- // ... after a letter character
- bEndOfLetters = true;
- }
- iLength--;
- }
- else
- {
- if (!bEndOfLetters)
- {
- word[i - iIgnoreAtBeginning] = (char)nc;
- }
- else
- {
- return null;
- }
- }
- }
- len = iLength;
- if (len < (remainCharCount + pushCharCount))
- {
- // word is too short to be hyphenated
- return null;
- }
- int[] result = new int[len + 1];
- int k = 0;
-
- // check exception list first
- string sw = new string(word, 1, len);
- if (stoplist.ContainsKey(sw))
- {
- // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
- // null)
- IList<object> hw = stoplist[sw];
- int j = 0;
- for (i = 0; i < hw.Count; i++)
- {
- object o = hw[i];
- // j = index(sw) = letterindex(word)?
- // result[k] = corresponding index(w)
- if (o is string)
- {
- j += ((string)o).Length;
- if (j >= remainCharCount && j < (len - pushCharCount))
- {
- result[k++] = j + iIgnoreAtBeginning;
- }
- }
- }
- }
- else
- {
- // use algorithm to get hyphenation points
- word[0] = '.'; // word start marker
- word[len + 1] = '.'; // word end marker
- word[len + 2] = (char)0; // null terminated
- sbyte[] il = new sbyte[len + 3]; // initialized to zero
- for (i = 0; i < len + 1; i++)
- {
- SearchPatterns(word, i, il);
- }
-
- // hyphenation points are located where interletter value is odd
- // i is letterindex(word),
- // i + 1 is index(word),
- // result[k] = corresponding index(w)
- for (i = 0; i < len; i++)
- {
- if (((il[i + 1] & 1) == 1) && i >= remainCharCount && i <= (len - pushCharCount))
- {
- result[k++] = i + iIgnoreAtBeginning;
- }
- }
- }
-
- if (k > 0)
- {
- // trim result array
- int[] res = new int[k + 2];
- Array.Copy(result, 0, res, 1, k);
- // We add the synthetical hyphenation points
- // at the beginning and end of the word
- res[0] = 0;
- res[k + 1] = len;
- return new Hyphenation(res);
- }
- else
- {
- return null;
- }
- }
-
- /// <summary>
- /// Add a character class to the tree. It is used by
- /// <seealso cref="PatternParser PatternParser"/> as callback to add character classes.
- /// Character classes define the valid word characters for hyphenation. If a
- /// word contains a character not defined in any of the classes, it is not
- /// hyphenated. It also defines a way to normalize the characters in order to
- /// compare them with the stored patterns. Usually pattern files use only lower
- /// case characters, in this case a class for letter 'a', for example, should
- /// be defined as "aA", the first character being the normalization char.
- /// </summary>
- public virtual void AddClass(string chargroup)
- {
- if (chargroup.Length > 0)
- {
- char equivChar = chargroup[0];
- char[] key = new char[2];
- key[1] = (char)0;
- for (int i = 0; i < chargroup.Length; i++)
- {
- key[0] = chargroup[i];
- classmap.Insert(key, 0, equivChar);
- }
- }
- }
-
- /// <summary>
- /// Add an exception to the tree. It is used by
- /// <seealso cref="PatternParser PatternParser"/> class as callback to store the
- /// hyphenation exceptions.
- /// </summary>
- /// <param name="word"> normalized word </param>
- /// <param name="hyphenatedword"> a vector of alternating strings and
- /// <seealso cref="Hyphen hyphen"/> objects. </param>
- public virtual void AddException(string word, List<object> hyphenatedword)
- {
- stoplist[word] = hyphenatedword;
- }
-
- /// <summary>
- /// Add a pattern to the tree. Mainly, to be used by
- /// <seealso cref="PatternParser PatternParser"/> class as callback to add a pattern to
- /// the tree.
- /// </summary>
- /// <param name="pattern"> the hyphenation pattern </param>
- /// <param name="ivalue"> interletter weight values indicating the desirability and
- /// priority of hyphenating at a given point within the pattern. It
- /// should contain only digit characters. (i.e. '0' to '9'). </param>
- public virtual void AddPattern(string pattern, string ivalue)
- {
- int k = ivalues.Find(ivalue);
- if (k <= 0)
- {
- k = PackValues(ivalue);
- ivalues.Insert(ivalue, (char)k);
- }
- Insert(pattern, (char)k);
- }
-
- // public override void printStats(PrintStream @out)
- // {
- //@out.println("Value space size = " + Convert.ToString(vspace.length()));
- //base.printStats(@out);
-
- // }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
deleted file mode 100644
index 069badd..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
+++ /dev/null
@@ -1,54 +0,0 @@
-\ufeffusing System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// This interface is used to connect the XML pattern file parser to the
- /// hyphenation tree.
- ///
- /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
- /// </summary>
- public interface IPatternConsumer
- {
-
- /// <summary>
- /// Add a character class. A character class defines characters that are
- /// considered equivalent for the purpose of hyphenation (e.g. "aA"). It
- /// usually means to ignore case.
- /// </summary>
- /// <param name="chargroup"> character group </param>
- void AddClass(string chargroup);
-
- /// <summary>
- /// Add a hyphenation exception. An exception replaces the result obtained by
- /// the algorithm for cases for which this fails or the user wants to provide
- /// his own hyphenation. A hyphenatedword is a vector of alternating String's
- /// and <seealso cref="Hyphen"/> instances
- /// </summary>
- void AddException(string word, List<object> hyphenatedword);
-
- /// <summary>
- /// Add hyphenation patterns.
- /// </summary>
- /// <param name="pattern"> the pattern </param>
- /// <param name="values"> interletter values expressed as a string of digit characters. </param>
- void AddPattern(string pattern, string values);
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
deleted file mode 100644
index 8c00d19..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
+++ /dev/null
@@ -1,483 +0,0 @@
-\ufeffusing System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text;
-using System.Xml;
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// A XMLReader document handler to read and parse hyphenation patterns from a XML
- /// file.
- ///
- /// LUCENENET: This class has been refactored from its Java counterpart to use XmlReader rather
- /// than a SAX parser.
- /// </summary>
- public class PatternParser
- {
- internal int currElement;
-
- internal IPatternConsumer consumer;
-
- internal StringBuilder token;
-
- internal List<object> exception;
-
- internal char hyphenChar;
-
- internal string errMsg;
-
- internal const int ELEM_CLASSES = 1;
-
- internal const int ELEM_EXCEPTIONS = 2;
-
- internal const int ELEM_PATTERNS = 3;
-
- internal const int ELEM_HYPHEN = 4;
-
- public PatternParser()
- {
- token = new StringBuilder();
- hyphenChar = '-'; // default
- }
-
- public PatternParser(IPatternConsumer consumer) : this()
- {
- this.consumer = consumer;
- }
-
- public virtual IPatternConsumer Consumer
- {
- set
- {
- this.consumer = value;
- }
- }
-
- /// <summary>
- /// Parses a hyphenation pattern file.
- /// </summary>
- /// <param name="filename"> the filename </param>
- /// <exception cref="IOException"> In case of an exception while parsing </exception>
- public virtual void Parse(string filename)
- {
- // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
- using (var src = XmlReader.Create(filename, new XmlReaderSettings
- {
- DtdProcessing = DtdProcessing.Parse,
- XmlResolver = new DtdResolver()
- }))
- {
- Parse(src);
- }
- }
-
- /// <summary>
- /// Parses a hyphenation pattern file.
- /// </summary>
- /// <param name="file"> the pattern file </param>
- public virtual void Parse(FileInfo file)
- {
- Parse(file, Encoding.UTF8);
- }
-
- /// <summary>
- /// Parses a hyphenation pattern file.
- /// </summary>
- /// <param name="file"> the pattern file </param>
- public virtual void Parse(FileInfo file, Encoding encoding)
- {
- using (var src = XmlReader.Create(new StreamReader(file.FullName, encoding), new XmlReaderSettings
- {
- DtdProcessing = DtdProcessing.Parse,
- XmlResolver = new DtdResolver()
- }))
- {
-
- Parse(src);
- }
- }
-
- /// <summary>
- /// Parses a hyphenation pattern file.
- /// </summary>
- /// <param name="file"> the pattern file </param>
- public virtual void Parse(Stream xmlStream)
- {
- using (var src = XmlReader.Create(xmlStream, new XmlReaderSettings
- {
- DtdProcessing = DtdProcessing.Parse,
- XmlResolver = new DtdResolver()
- }))
- {
- Parse(src);
- }
- }
-
- /// <summary>
- /// Parses a hyphenation pattern file.
- /// </summary>
- /// <param name="source"> the InputSource for the file </param>
- /// <exception cref="IOException"> In case of an exception while parsing </exception>
- public virtual void Parse(XmlReader source)
- {
- source.MoveToContent();
- while (source.Read())
- {
- ParseNode(source);
- }
- }
-
- private void ParseNode(XmlReader node)
- {
- string uri, name, raw;
- switch (node.NodeType)
- {
- case XmlNodeType.Element:
-
- // Element start
- uri = node.NamespaceURI;
- name = node.Name;
- bool isEmptyElement = node.IsEmptyElement;
- var attributes = GetAttributes(node);
- raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer
-
- this.StartElement(uri, name, raw, attributes);
- if (isEmptyElement)
- {
- this.EndElement(uri, name, raw);
- }
- break;
-
- case XmlNodeType.Text:
-
- this.Characters(node.Value.ToCharArray(), 0, node.Value.Length);
- break;
-
- case XmlNodeType.EndElement:
- uri = node.NamespaceURI;
- name = node.Name;
- raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer
-
- // Element end
- this.EndElement(uri, name, raw);
- break;
- }
- }
-
- private IDictionary<string, string> GetAttributes(XmlReader node)
- {
- var result = new Dictionary<string, string>();
- if (node.HasAttributes)
- {
- for (int i = 0; i < node.AttributeCount; i++)
- {
- node.MoveToAttribute(i);
- result.Add(node.Name, node.Value);
- }
- }
-
- return result;
- }
-
- protected internal virtual string ReadToken(StringBuilder chars)
- {
- string word;
- bool space = false;
- int i;
- for (i = 0; i < chars.Length; i++)
- {
- if (char.IsWhiteSpace(chars[i]))
- {
- space = true;
- }
- else
- {
- break;
- }
- }
- if (space)
- {
- // chars.delete(0,i);
- for (int countr = i; countr < chars.Length; countr++)
- {
- chars[countr - i] = chars[countr];
- }
- chars.Length = chars.Length - i;
- if (token.Length > 0)
- {
- word = token.ToString();
- token.Length = 0;
- return word;
- }
- }
- space = false;
- for (i = 0; i < chars.Length; i++)
- {
- if (char.IsWhiteSpace(chars[i]))
- {
- space = true;
- break;
- }
- }
- token.Append(chars.ToString(0, i - 0));
- // chars.delete(0,i);
- for (int countr = i; countr < chars.Length; countr++)
- {
- chars[countr - i] = chars[countr];
- }
- chars.Length = chars.Length - i;
- if (space)
- {
- word = token.ToString();
- token.Length = 0;
- return word;
- }
- token.Append(chars.ToString());
- return null;
- }
-
- protected internal static string GetPattern(string word)
- {
- StringBuilder pat = new StringBuilder();
- int len = word.Length;
- for (int i = 0; i < len; i++)
- {
- if (!char.IsDigit(word[i]))
- {
- pat.Append(word[i]);
- }
- }
- return pat.ToString();
- }
-
- protected internal virtual List<object> NormalizeException<T1>(List<T1> ex)
- {
- List<object> res = new List<object>();
- for (int i = 0; i < ex.Count; i++)
- {
- object item = ex[i];
- if (item is string)
- {
- string str = (string)item;
- StringBuilder buf = new StringBuilder();
- for (int j = 0; j < str.Length; j++)
- {
- char c = str[j];
- if (c != hyphenChar)
- {
- buf.Append(c);
- }
- else
- {
- res.Add(buf.ToString());
- buf.Length = 0;
- char[] h = new char[1];
- h[0] = hyphenChar;
- // we use here hyphenChar which is not necessarily
- // the one to be printed
- res.Add(new Hyphen(new string(h), null, null));
- }
- }
- if (buf.Length > 0)
- {
- res.Add(buf.ToString());
- }
- }
- else
- {
- res.Add(item);
- }
- }
- return res;
- }
-
- protected internal virtual string GetExceptionWord<T1>(List<T1> ex)
- {
- StringBuilder res = new StringBuilder();
- for (int i = 0; i < ex.Count; i++)
- {
- object item = ex[i];
- if (item is string)
- {
- res.Append((string)item);
- }
- else
- {
- if (((Hyphen)item).noBreak != null)
- {
- res.Append(((Hyphen)item).noBreak);
- }
- }
- }
- return res.ToString();
- }
-
- protected internal static string GetInterletterValues(string pat)
- {
- StringBuilder il = new StringBuilder();
- string word = pat + "a"; // add dummy letter to serve as sentinel
- int len = word.Length;
- for (int i = 0; i < len; i++)
- {
- char c = word[i];
- if (char.IsDigit(c))
- {
- il.Append(c);
- i++;
- }
- else
- {
- il.Append('0');
- }
- }
- return il.ToString();
- }
-
- /// <summary>
- /// LUCENENET specific helper class to force the DTD file to be read from the embedded resource
- /// rather than from the file system.
- /// </summary>
- internal class DtdResolver : XmlUrlResolver
- {
- public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
- {
- string dtdFilename = "hyphenation.dtd";
- if (dtdFilename.Equals(absoluteUri.Segments.LastOrDefault(), StringComparison.OrdinalIgnoreCase))
- {
- var qualifedDtdFilename = string.Concat(GetType().Namespace, ".", dtdFilename);
- return GetType().Assembly.GetManifestResourceStream(qualifedDtdFilename);
- }
-
- return base.GetEntity(absoluteUri, role, ofObjectToReturn);
- }
- }
-
- //
- // ContentHandler methods
- //
-
- /// <seealso cref= org.xml.sax.ContentHandler#startElement(java.lang.String,
- /// java.lang.String, java.lang.String, org.xml.sax.Attributes) </seealso>
- public void StartElement(string uri, string local, string raw, IDictionary<string, string> attrs)
- {
- if (local.Equals("hyphen-char"))
- {
- string h = attrs.ContainsKey("value") ? attrs["value"] : null;
- if (h != null && h.Length == 1)
- {
- hyphenChar = h[0];
- }
- }
- else if (local.Equals("classes"))
- {
- currElement = ELEM_CLASSES;
- }
- else if (local.Equals("patterns"))
- {
- currElement = ELEM_PATTERNS;
- }
- else if (local.Equals("exceptions"))
- {
- currElement = ELEM_EXCEPTIONS;
- exception = new List<object>();
- }
- else if (local.Equals("hyphen"))
- {
- if (token.Length > 0)
- {
- exception.Add(token.ToString());
- }
- exception.Add(new Hyphen(attrs["pre"], attrs["no"], attrs["post"]));
- currElement = ELEM_HYPHEN;
- }
- token.Length = 0;
- }
-
- /// <seealso cref= org.xml.sax.ContentHandler#endElement(java.lang.String,
- /// java.lang.String, java.lang.String) </seealso>
- public void EndElement(string uri, string local, string raw)
- {
- if (token.Length > 0)
- {
- string word = token.ToString();
- switch (currElement)
- {
- case ELEM_CLASSES:
- consumer.AddClass(word);
- break;
- case ELEM_EXCEPTIONS:
- exception.Add(word);
- exception = NormalizeException(exception);
- consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
- break;
- case ELEM_PATTERNS:
- consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
- break;
- case ELEM_HYPHEN:
- // nothing to do
- break;
- }
- if (currElement != ELEM_HYPHEN)
- {
- token.Length = 0;
- }
- }
- if (currElement == ELEM_HYPHEN)
- {
- currElement = ELEM_EXCEPTIONS;
- }
- else
- {
- currElement = 0;
- }
- }
-
- /// <seealso cref= org.xml.sax.ContentHandler#characters(char[], int, int) </seealso>
- public void Characters(char[] ch, int start, int length)
- {
- StringBuilder chars = new StringBuilder(length);
- chars.Append(ch, start, length);
- string word = ReadToken(chars);
- while (word != null)
- {
- // System.out.println("\"" + word + "\"");
- switch (currElement)
- {
- case ELEM_CLASSES:
- consumer.AddClass(word);
- break;
- case ELEM_EXCEPTIONS:
- exception.Add(word);
- exception = NormalizeException(exception);
- consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
- exception.Clear();
- break;
- case ELEM_PATTERNS:
- consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
- break;
- }
- word = ReadToken(chars);
- }
-
- }
- }
-}
\ No newline at end of file