You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2016/08/23 23:18:31 UTC
[38/50] [abbrv] lucenenet git commit: Ported Analysis.Compound
namespace + tests
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
index f17d37c..98f1e47 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
@@ -1,779 +1,811 @@
-\ufeff/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-using System;
+\ufeffusing System;
+using System.Collections;
using System.Collections.Generic;
using System.Text;
namespace Lucene.Net.Analysis.Compound.Hyphenation
{
-
-
- /// <summary>
- /// <h2>Ternary Search Tree.</h2>
- ///
- /// <para>
- /// A ternary search tree is a hybrid between a binary tree and a digital search
- /// tree (trie). Keys are limited to strings. A data value of type char is stored
- /// in each leaf node. It can be used as an index (or pointer) to the data.
- /// Branches that only contain one key are compressed to one node by storing a
- /// pointer to the trailer substring of the key. This class is intended to serve
- /// as base class or helper class to implement Dictionary collections or the
- /// like. Ternary trees have some nice properties as the following: the tree can
- /// be traversed in sorted order, partial matches (wildcard) can be implemented,
- /// retrieval of all keys within a given distance from the target, etc. The
- /// storage requirements are higher than a binary tree but a lot less than a
- /// trie. Performance is comparable with a hash table, sometimes it outperforms a
- /// hash function (most of the time can determine a miss faster than a hash).
- /// </para>
- ///
- /// <para>
- /// The main purpose of this java port is to serve as a base for implementing
- /// TeX's hyphenation algorithm (see The TeXBook, appendix H). Each language
- /// requires from 5000 to 15000 hyphenation patterns which will be keys in this
- /// tree. The strings patterns are usually small (from 2 to 5 characters), but
- /// each char in the tree is stored in a node. Thus memory usage is the main
- /// concern. We will sacrifice 'elegance' to keep memory requirements to the
- /// minimum. Using java's char type as pointer (yes, I know pointer it is a
- /// forbidden word in java) we can keep the size of the node to be just 8 bytes
- /// (3 pointers and the data char). This gives room for about 65000 nodes. In my
- /// tests the english patterns took 7694 nodes and the german patterns 10055
- /// nodes, so I think we are safe.
- /// </para>
- ///
- /// <para>
- /// All said, this is a map with strings as keys and char as value. Pretty
- /// limited!. It can be extended to a general map by using the string
- /// representation of an object and using the char value as an index to an array
- /// that contains the object values.
- /// </para>
- ///
- /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
- /// </summary>
-
- public class TernaryTree : ICloneable
- {
-
- /// <summary>
- /// We use 4 arrays to represent a node. I guess I should have created a proper
- /// node class, but somehow Knuth's pascal code made me forget we now have a
- /// portable language with virtual memory management and automatic garbage
- /// collection! And now is kind of late, furthermore, if it ain't broken, don't
- /// fix it.
- /// </summary>
-
- /// <summary>
- /// Pointer to low branch and to rest of the key when it is stored directly in
- /// this node, we don't have unions in java!
- /// </summary>
- protected internal char[] lo;
-
- /// <summary>
- /// Pointer to high branch.
- /// </summary>
- protected internal char[] hi;
-
- /// <summary>
- /// Pointer to equal branch and to data when this node is a string terminator.
- /// </summary>
- protected internal char[] eq;
-
- /// <summary>
- /// <P>
- /// The character stored in this node: splitchar. Two special values are
- /// reserved:
- /// </P>
- /// <ul>
- /// <li>0x0000 as string terminator</li>
- /// <li>0xFFFF to indicate that the branch starting at this node is compressed</li>
- /// </ul>
- /// <para>
- /// This shouldn't be a problem if we give the usual semantics to strings since
- /// 0xFFFF is guaranteed not to be an Unicode character.
- /// </para>
- /// </summary>
- protected internal char[] sc;
-
- /// <summary>
- /// This vector holds the trailing of the keys when the branch is compressed.
- /// </summary>
- protected internal CharVector kv;
-
- protected internal char root;
-
- protected internal char freenode;
-
- protected internal int length; // number of items in tree
-
- protected internal const int BLOCK_SIZE = 2048; // allocation size for arrays
-
- internal TernaryTree()
- {
- init();
- }
-
- protected internal virtual void init()
- {
- root = (char)0;
- freenode = (char)1;
- length = 0;
- lo = new char[BLOCK_SIZE];
- hi = new char[BLOCK_SIZE];
- eq = new char[BLOCK_SIZE];
- sc = new char[BLOCK_SIZE];
- kv = new CharVector();
- }
-
- /// <summary>
- /// Branches are initially compressed, needing one node per key plus the size
- /// of the string key. They are decompressed as needed when another key with
- /// same prefix is inserted. This saves a lot of space, specially for long
- /// keys.
- /// </summary>
- public virtual void insert(string key, char val)
- {
- // make sure we have enough room in the arrays
- int len = key.Length + 1; // maximum number of nodes that may be generated
- if (freenode + len > eq.Length)
- {
- redimNodeArrays(eq.Length + BLOCK_SIZE);
- }
- char[] strkey = new char[len--];
- key.CopyTo(0, strkey, 0, len - 0);
- strkey[len] = (char)0;
- root = insert(root, strkey, 0, val);
- }
-
- public virtual void insert(char[] key, int start, char val)
- {
- int len = strlen(key) + 1;
- if (freenode + len > eq.Length)
- {
- redimNodeArrays(eq.Length + BLOCK_SIZE);
- }
- root = insert(root, key, start, val);
- }
-
- /// <summary>
- /// The actual insertion function, recursive version.
- /// </summary>
- private char insert(char p, char[] key, int start, char val)
- {
- int len = strlen(key, start);
- if (p == 0)
- {
- // this means there is no branch, this node will start a new branch.
- // Instead of doing that, we store the key somewhere else and create
- // only one node with a pointer to the key
- p = freenode++;
- eq[p] = val; // holds data
- length++;
- hi[p] = (char)0;
- if (len > 0)
- {
- sc[p] = (char)0xFFFF; // indicates branch is compressed
- lo[p] = (char) kv.alloc(len + 1); // use 'lo' to hold pointer to key
- strcpy(kv.Array, lo[p], key, start);
- }
- else
- {
- sc[p] = (char)0;
- lo[p] = (char)0;
- }
- return p;
- }
-
- if (sc[p] == 0xFFFF)
- {
- // branch is compressed: need to decompress
- // this will generate garbage in the external key array
- // but we can do some garbage collection later
- char pp = freenode++;
- lo[pp] = lo[p]; // previous pointer to key
- eq[pp] = eq[p]; // previous pointer to data
- lo[p] = (char)0;
- if (len > 0)
- {
- sc[p] = kv.get(lo[pp]);
- eq[p] = pp;
- lo[pp]++;
- if (kv.get(lo[pp]) == 0)
- {
- // key completly decompressed leaving garbage in key array
- lo[pp] = (char)0;
- sc[pp] = (char)0;
- hi[pp] = (char)0;
- }
- else
- {
- // we only got first char of key, rest is still there
- sc[pp] = (char)0xFFFF;
- }
- }
- else
- {
- // In this case we can save a node by swapping the new node
- // with the compressed node
- sc[pp] = (char)0xFFFF;
- hi[p] = pp;
- sc[p] = (char)0;
- eq[p] = val;
- length++;
- return p;
- }
- }
- char s = key[start];
- if (s < sc[p])
- {
- lo[p] = insert(lo[p], key, start, val);
- }
- else if (s == sc[p])
- {
- if (s != 0)
- {
- eq[p] = insert(eq[p], key, start + 1, val);
- }
- else
- {
- // key already in tree, overwrite data
- eq[p] = val;
- }
- }
- else
- {
- hi[p] = insert(hi[p], key, start, val);
- }
- return p;
- }
-
- /// <summary>
- /// Compares 2 null terminated char arrays
- /// </summary>
- public static int strcmp(char[] a, int startA, char[] b, int startB)
- {
- for (; a[startA] == b[startB]; startA++, startB++)
- {
- if (a[startA] == 0)
- {
- return 0;
- }
- }
- return a[startA] - b[startB];
- }
-
- /// <summary>
- /// Compares a string with null terminated char array
- /// </summary>
- public static int strcmp(string str, char[] a, int start)
- {
- int i , d , len = str.Length;
- for (i = 0; i < len; i++)
- {
- d = (int) str[i] - a[start + i];
- if (d != 0)
- {
- return d;
- }
- if (a[start + i] == 0)
- {
- return d;
- }
- }
- if (a[start + i] != 0)
- {
- return -a[start + i];
- }
- return 0;
-
- }
-
- public static void strcpy(char[] dst, int di, char[] src, int si)
- {
- while (src[si] != 0)
- {
- dst[di++] = src[si++];
- }
- dst[di] = (char)0;
- }
-
- public static int strlen(char[] a, int start)
- {
- int len = 0;
- for (int i = start; i < a.Length && a[i] != 0; i++)
- {
- len++;
- }
- return len;
- }
-
- public static int strlen(char[] a)
- {
- return strlen(a, 0);
- }
-
- public virtual int find(string key)
- {
- int len = key.Length;
- char[] strkey = new char[len + 1];
- key.CopyTo(0, strkey, 0, len - 0);
- strkey[len] = (char)0;
-
- return find(strkey, 0);
- }
-
- public virtual int find(char[] key, int start)
- {
- int d;
- char p = root;
- int i = start;
- char c;
-
- while (p != 0)
- {
- if (sc[p] == 0xFFFF)
- {
- if (strcmp(key, i, kv.Array, lo[p]) == 0)
- {
- return eq[p];
- }
- else
- {
- return -1;
- }
- }
- c = key[i];
- d = c - sc[p];
- if (d == 0)
- {
- if (c == 0)
- {
- return eq[p];
- }
- i++;
- p = eq[p];
- }
- else if (d < 0)
- {
- p = lo[p];
- }
- else
- {
- p = hi[p];
- }
- }
- return -1;
- }
-
- public virtual bool knows(string key)
- {
- return (find(key) >= 0);
- }
-
- // redimension the arrays
- private void redimNodeArrays(int newsize)
- {
- int len = newsize < lo.Length ? newsize : lo.Length;
- char[] na = new char[newsize];
- Array.Copy(lo, 0, na, 0, len);
- lo = na;
- na = new char[newsize];
- Array.Copy(hi, 0, na, 0, len);
- hi = na;
- na = new char[newsize];
- Array.Copy(eq, 0, na, 0, len);
- eq = na;
- na = new char[newsize];
- Array.Copy(sc, 0, na, 0, len);
- sc = na;
- }
-
- public virtual int size()
- {
- return length;
- }
-
- public override TernaryTree clone()
- {
- TernaryTree t = new TernaryTree();
- t.lo = this.lo.Clone();
- t.hi = this.hi.Clone();
- t.eq = this.eq.Clone();
- t.sc = this.sc.Clone();
- t.kv = this.kv.clone();
- t.root = this.root;
- t.freenode = this.freenode;
- t.length = this.length;
-
- return t;
- }
-
- /// <summary>
- /// Recursively insert the median first and then the median of the lower and
- /// upper halves, and so on in order to get a balanced tree. The array of keys
- /// is assumed to be sorted in ascending order.
- /// </summary>
- protected internal virtual void insertBalanced(string[] k, char[] v, int offset, int n)
- {
- int m;
- if (n < 1)
- {
- return;
- }
- m = n >> 1;
-
- insert(k[m + offset], v[m + offset]);
- insertBalanced(k, v, offset, m);
-
- insertBalanced(k, v, offset + m + 1, n - m - 1);
- }
-
- /// <summary>
- /// Balance the tree for best search performance
- /// </summary>
- public virtual void balance()
- {
- // System.out.print("Before root splitchar = ");
- // System.out.println(sc[root]);
-
- int i = 0, n = length;
- string[] k = new string[n];
- char[] v = new char[n];
- Iterator iter = new Iterator(this);
- while (iter.hasMoreElements())
- {
- v[i] = iter.Value;
- k[i++] = iter.nextElement();
- }
- init();
- insertBalanced(k, v, 0, n);
-
- // With uniform letter distribution sc[root] should be around 'm'
- // System.out.print("After root splitchar = ");
- // System.out.println(sc[root]);
- }
-
- /// <summary>
- /// Each node stores a character (splitchar) which is part of some key(s). In a
- /// compressed branch (one that only contain a single string key) the trailer
- /// of the key which is not already in nodes is stored externally in the kv
- /// array. As items are inserted, key substrings decrease. Some substrings may
- /// completely disappear when the whole branch is totally decompressed. The
- /// tree is traversed to find the key substrings actually used. In addition,
- /// duplicate substrings are removed using a map (implemented with a
- /// TernaryTree!).
- ///
- /// </summary>
- public virtual void trimToSize()
- {
- // first balance the tree for best performance
- balance();
-
- // redimension the node arrays
- redimNodeArrays(freenode);
-
- // ok, compact kv array
- CharVector kx = new CharVector();
- kx.alloc(1);
- TernaryTree map = new TernaryTree();
- compact(kx, map, root);
- kv = kx;
- kv.trimToSize();
- }
-
- private void compact(CharVector kx, TernaryTree map, char p)
- {
- int k;
- if (p == 0)
- {
- return;
- }
- if (sc[p] == 0xFFFF)
- {
- k = map.find(kv.Array, lo[p]);
- if (k < 0)
- {
- k = kx.alloc(strlen(kv.Array, lo[p]) + 1);
- strcpy(kx.Array, k, kv.Array, lo[p]);
- map.insert(kx.Array, k, (char) k);
- }
- lo[p] = (char) k;
- }
- else
- {
- compact(kx, map, lo[p]);
- if (sc[p] != 0)
- {
- compact(kx, map, eq[p]);
- }
- compact(kx, map, hi[p]);
- }
- }
-
- public virtual IEnumerator<string> keys()
- {
- return new Iterator(this);
- }
-
- public class Iterator : IEnumerator<string>
- {
- private readonly TernaryTree outerInstance;
-
-
- /// <summary>
- /// current node index
- /// </summary>
- internal int cur;
-
- /// <summary>
- /// current key
- /// </summary>
- internal string curkey;
-
- private class Item : ICloneable
- {
- private readonly TernaryTree.Iterator outerInstance;
-
- internal char parent;
-
- internal char child;
-
- public Item(TernaryTree.Iterator outerInstance)
- {
- this.outerInstance = outerInstance;
- parent = (char)0;
- child = (char)0;
- }
-
- public Item(TernaryTree.Iterator outerInstance, char p, char c)
- {
- this.outerInstance = outerInstance;
- parent = p;
- child = c;
- }
-
- public override Item clone()
- {
- return new Item(outerInstance, parent, child);
- }
-
- }
-
- /// <summary>
- /// Node stack
- /// </summary>
- internal Stack<Item> ns;
-
- /// <summary>
- /// key stack implemented with a StringBuilder
- /// </summary>
- internal StringBuilder ks;
-
- public Iterator(TernaryTree outerInstance)
- {
- this.outerInstance = outerInstance;
- cur = -1;
- ns = new Stack<>();
- ks = new StringBuilder();
- rewind();
- }
-
- public virtual void rewind()
- {
- ns.removeAllElements();
- ks.Length = 0;
- cur = outerInstance.root;
- run();
- }
-
- public override string nextElement()
- {
- string res = curkey;
- cur = up();
- run();
- return res;
- }
-
- public virtual char Value
- {
- get
- {
- if (cur >= 0)
- {
- return outerInstance.eq[cur];
- }
- return 0;
- }
- }
-
- public override bool hasMoreElements()
- {
- return (cur != -1);
- }
-
- /// <summary>
- /// traverse upwards
- /// </summary>
- internal virtual int up()
- {
- Item i = new Item(this);
- int res = 0;
-
- if (ns.Count == 0)
- {
- return -1;
- }
-
- if (cur != 0 && outerInstance.sc[cur] == 0)
- {
- return outerInstance.lo[cur];
- }
-
- bool climb = true;
-
- while (climb)
- {
- i = ns.Pop();
- i.child++;
- switch (i.child)
- {
- case 1:
- if (outerInstance.sc[i.parent] != 0)
- {
- res = outerInstance.eq[i.parent];
- ns.Push(i.clone());
- ks.Append(outerInstance.sc[i.parent]);
- }
- else
- {
- i.child++;
- ns.Push(i.clone());
- res = outerInstance.hi[i.parent];
- }
- climb = false;
- break;
-
- case 2:
- res = outerInstance.hi[i.parent];
- ns.Push(i.clone());
- if (ks.Length > 0)
- {
- ks.Length = ks.Length - 1; // pop
- }
- climb = false;
- break;
-
- default:
- if (ns.Count == 0)
- {
- return -1;
- }
- climb = true;
- break;
- }
- }
- return res;
- }
-
- /// <summary>
- /// traverse the tree to find next key
- /// </summary>
- internal virtual int run()
- {
- if (cur == -1)
- {
- return -1;
- }
-
- bool leaf = false;
- while (true)
- {
- // first go down on low branch until leaf or compressed branch
- while (cur != 0)
- {
- if (outerInstance.sc[cur] == 0xFFFF)
- {
- leaf = true;
- break;
- }
- ns.Push(new Item(this, (char) cur, '\u0000'));
- if (outerInstance.sc[cur] == 0)
- {
- leaf = true;
- break;
- }
- cur = outerInstance.lo[cur];
- }
- if (leaf)
- {
- break;
- }
- // nothing found, go up one node and try again
- cur = up();
- if (cur == -1)
- {
- return -1;
- }
- }
- // The current node should be a data node and
- // the key should be in the key stack (at least partially)
- StringBuilder buf = new StringBuilder(ks.ToString());
- if (outerInstance.sc[cur] == 0xFFFF)
- {
- int p = outerInstance.lo[cur];
- while (outerInstance.kv.get(p) != 0)
- {
- buf.Append(outerInstance.kv.get(p++));
- }
- }
- curkey = buf.ToString();
- return 0;
- }
-
- }
-
- public virtual void printStats(PrintStream @out)
- {
- @out.println("Number of keys = " + Convert.ToString(length));
- @out.println("Node count = " + Convert.ToString(freenode));
- // System.out.println("Array length = " + Integer.toString(eq.length));
- @out.println("Key Array length = " + Convert.ToString(kv.length()));
-
- /*
- * for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 )
- * System.out.print(kv.get(i)); else System.out.println("");
- * System.out.println("Keys:"); for(Enumeration enum = keys();
- * enum.hasMoreElements(); ) System.out.println(enum.nextElement());
- */
-
- }
- /*
- public static void main(String[] args) {
- TernaryTree tt = new TernaryTree();
- tt.insert("Carlos", 'C');
- tt.insert("Car", 'r');
- tt.insert("palos", 'l');
- tt.insert("pa", 'p');
- tt.trimToSize();
- System.out.println((char) tt.find("Car"));
- System.out.println((char) tt.find("Carlos"));
- System.out.println((char) tt.find("alto"));
- tt.printStats(System.out);
- }
- */
-
- }
-
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// <h2>Ternary Search Tree.</h2>
+ ///
+ /// <para>
+ /// A ternary search tree is a hybrid between a binary tree and a digital search
+ /// tree (trie). Keys are limited to strings. A data value of type char is stored
+ /// in each leaf node. It can be used as an index (or pointer) to the data.
+ /// Branches that only contain one key are compressed to one node by storing a
+ /// pointer to the trailer substring of the key. This class is intended to serve
+ /// as base class or helper class to implement Dictionary collections or the
+ /// like. Ternary trees have some nice properties as the following: the tree can
+ /// be traversed in sorted order, partial matches (wildcard) can be implemented,
+ /// retrieval of all keys within a given distance from the target, etc. The
+ /// storage requirements are higher than a binary tree but a lot less than a
+ /// trie. Performance is comparable with a hash table, sometimes it outperforms a
+ /// hash function (most of the time can determine a miss faster than a hash).
+ /// </para>
+ ///
+ /// <para>
+ /// The main purpose of this java port is to serve as a base for implementing
+ /// TeX's hyphenation algorithm (see The TeXBook, appendix H). Each language
+ /// requires from 5000 to 15000 hyphenation patterns which will be keys in this
+ /// tree. The strings patterns are usually small (from 2 to 5 characters), but
+ /// each char in the tree is stored in a node. Thus memory usage is the main
+ /// concern. We will sacrifice 'elegance' to keep memory requirements to the
+ /// minimum. Using java's char type as pointer (yes, I know pointer it is a
+ /// forbidden word in java) we can keep the size of the node to be just 8 bytes
+ /// (3 pointers and the data char). This gives room for about 65000 nodes. In my
+ /// tests the english patterns took 7694 nodes and the german patterns 10055
+ /// nodes, so I think we are safe.
+ /// </para>
+ ///
+ /// <para>
+ /// All said, this is a map with strings as keys and char as value. Pretty
+ /// limited!. It can be extended to a general map by using the string
+ /// representation of an object and using the char value as an index to an array
+ /// that contains the object values.
+ /// </para>
+ ///
+ /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
+ /// </summary>
+
+ public class TernaryTree : ICloneable
+ {
+ /// <summary>
+ /// We use 4 arrays to represent a node. I guess I should have created a proper
+ /// node class, but somehow Knuth's pascal code made me forget we now have a
+ /// portable language with virtual memory management and automatic garbage
+ /// collection! And now is kind of late, furthermore, if it ain't broken, don't
+ /// fix it.
+ /// </summary>
+
+ /// <summary>
+ /// Pointer to low branch and to rest of the key when it is stored directly in
+ /// this node, we don't have unions in java!
+ /// </summary>
+ protected internal char[] lo;
+
+ /// <summary>
+ /// Pointer to high branch.
+ /// </summary>
+ protected internal char[] hi;
+
+ /// <summary>
+ /// Pointer to equal branch and to data when this node is a string terminator.
+ /// </summary>
+ protected internal char[] eq;
+
+ /// <summary>
+ /// <P>
+ /// The character stored in this node: splitchar. Two special values are
+ /// reserved:
+ /// </P>
+ /// <ul>
+ /// <li>0x0000 as string terminator</li>
+ /// <li>0xFFFF to indicate that the branch starting at this node is compressed</li>
+ /// </ul>
+ /// <para>
+ /// This shouldn't be a problem if we give the usual semantics to strings since
+ /// 0xFFFF is guaranteed not to be an Unicode character.
+ /// </para>
+ /// </summary>
+ protected internal char[] sc;
+
+ /// <summary>
+ /// This vector holds the trailing of the keys when the branch is compressed.
+ /// </summary>
+ protected internal CharVector kv;
+
+ protected internal char root;
+
+ protected internal char freenode;
+
+ protected internal int length; // number of items in tree
+
+ protected internal const int BLOCK_SIZE = 2048; // allocation size for arrays
+
+ internal TernaryTree()
+ {
+ Init();
+ }
+
+ protected internal virtual void Init()
+ {
+ root = (char)0;
+ freenode = (char)1;
+ length = 0;
+ lo = new char[BLOCK_SIZE];
+ hi = new char[BLOCK_SIZE];
+ eq = new char[BLOCK_SIZE];
+ sc = new char[BLOCK_SIZE];
+ kv = new CharVector();
+ }
+
+ /// <summary>
+ /// Branches are initially compressed, needing one node per key plus the size
+ /// of the string key. They are decompressed as needed when another key with
+ /// same prefix is inserted. This saves a lot of space, specially for long
+ /// keys.
+ /// </summary>
+ public virtual void Insert(string key, char val)
+ {
+ // make sure we have enough room in the arrays
+ int len = key.Length + 1; // maximum number of nodes that may be generated
+ if (freenode + len > eq.Length)
+ {
+ RedimNodeArrays(eq.Length + BLOCK_SIZE);
+ }
+ char[] strkey = new char[len--];
+ key.CopyTo(0, strkey, 0, len - 0);
+ strkey[len] = (char)0;
+ root = Insert(root, strkey, 0, val);
+ }
+
+ public virtual void Insert(char[] key, int start, char val)
+ {
+ int len = StrLen(key) + 1;
+ if (freenode + len > eq.Length)
+ {
+ RedimNodeArrays(eq.Length + BLOCK_SIZE);
+ }
+ root = Insert(root, key, start, val);
+ }
+
+ /// <summary>
+ /// The actual insertion function, recursive version.
+ /// </summary>
+ private char Insert(char p, char[] key, int start, char val)
+ {
+ int len = StrLen(key, start);
+ if (p == 0)
+ {
+ // this means there is no branch, this node will start a new branch.
+ // Instead of doing that, we store the key somewhere else and create
+ // only one node with a pointer to the key
+ p = freenode++;
+ eq[p] = val; // holds data
+ length++;
+ hi[p] = (char)0;
+ if (len > 0)
+ {
+ sc[p] = (char)0xFFFF; // indicates branch is compressed
+ lo[p] = (char)kv.Alloc(len + 1); // use 'lo' to hold pointer to key
+ StrCpy(kv.Array, lo[p], key, start);
+ }
+ else
+ {
+ sc[p] = (char)0;
+ lo[p] = (char)0;
+ }
+ return p;
+ }
+
+ if (sc[p] == 0xFFFF)
+ {
+ // branch is compressed: need to decompress
+ // this will generate garbage in the external key array
+ // but we can do some garbage collection later
+ char pp = freenode++;
+ lo[pp] = lo[p]; // previous pointer to key
+ eq[pp] = eq[p]; // previous pointer to data
+ lo[p] = (char)0;
+ if (len > 0)
+ {
+ sc[p] = kv[lo[pp]];
+ eq[p] = pp;
+ lo[pp]++;
+ if (kv[lo[pp]] == 0)
+ {
+ // key completly decompressed leaving garbage in key array
+ lo[pp] = (char)0;
+ sc[pp] = (char)0;
+ hi[pp] = (char)0;
+ }
+ else
+ {
+ // we only got first char of key, rest is still there
+ sc[pp] = (char)0xFFFF;
+ }
+ }
+ else
+ {
+ // In this case we can save a node by swapping the new node
+ // with the compressed node
+ sc[pp] = (char)0xFFFF;
+ hi[p] = pp;
+ sc[p] = (char)0;
+ eq[p] = val;
+ length++;
+ return p;
+ }
+ }
+ char s = key[start];
+ if (s < sc[p])
+ {
+ lo[p] = Insert(lo[p], key, start, val);
+ }
+ else if (s == sc[p])
+ {
+ if (s != 0)
+ {
+ eq[p] = Insert(eq[p], key, start + 1, val);
+ }
+ else
+ {
+ // key already in tree, overwrite data
+ eq[p] = val;
+ }
+ }
+ else
+ {
+ hi[p] = Insert(hi[p], key, start, val);
+ }
+ return p;
+ }
+
+ /// <summary>
+ /// Compares 2 null terminated char arrays
+ /// </summary>
+ public static int StrCmp(char[] a, int startA, char[] b, int startB)
+ {
+ for (; a[startA] == b[startB]; startA++, startB++)
+ {
+ if (a[startA] == 0)
+ {
+ return 0;
+ }
+ }
+ return a[startA] - b[startB];
+ }
+
+ /// <summary>
+ /// Compares a string with null terminated char array
+ /// </summary>
+ public static int StrCmp(string str, char[] a, int start)
+ {
+ int i, d, len = str.Length;
+ for (i = 0; i < len; i++)
+ {
+ d = (int)str[i] - a[start + i];
+ if (d != 0)
+ {
+ return d;
+ }
+ if (a[start + i] == 0)
+ {
+ return d;
+ }
+ }
+ if (a[start + i] != 0)
+ {
+ return -a[start + i];
+ }
+ return 0;
+
+ }
+
+ public static void StrCpy(char[] dst, int di, char[] src, int si)
+ {
+ while (src[si] != 0)
+ {
+ dst[di++] = src[si++];
+ }
+ dst[di] = (char)0;
+ }
+
+ public static int StrLen(char[] a, int start)
+ {
+ int len = 0;
+ for (int i = start; i < a.Length && a[i] != 0; i++)
+ {
+ len++;
+ }
+ return len;
+ }
+
+ public static int StrLen(char[] a)
+ {
+ return StrLen(a, 0);
+ }
+
+ public virtual int Find(string key)
+ {
+ int len = key.Length;
+ char[] strkey = new char[len + 1];
+ key.CopyTo(0, strkey, 0, len - 0);
+ strkey[len] = (char)0;
+
+ return Find(strkey, 0);
+ }
+
+ public virtual int Find(char[] key, int start)
+ {
+ int d;
+ char p = root;
+ int i = start;
+ char c;
+
+ while (p != 0)
+ {
+ if (sc[p] == 0xFFFF)
+ {
+ if (StrCmp(key, i, kv.Array, lo[p]) == 0)
+ {
+ return eq[p];
+ }
+ else
+ {
+ return -1;
+ }
+ }
+ c = key[i];
+ d = c - sc[p];
+ if (d == 0)
+ {
+ if (c == 0)
+ {
+ return eq[p];
+ }
+ i++;
+ p = eq[p];
+ }
+ else if (d < 0)
+ {
+ p = lo[p];
+ }
+ else
+ {
+ p = hi[p];
+ }
+ }
+ return -1;
+ }
+
+ public virtual bool Knows(string key)
+ {
+ return (Find(key) >= 0);
+ }
+
+ // redimension the arrays
+ private void RedimNodeArrays(int newsize)
+ {
+ int len = newsize < lo.Length ? newsize : lo.Length;
+ char[] na = new char[newsize];
+ Array.Copy(lo, 0, na, 0, len);
+ lo = na;
+ na = new char[newsize];
+ Array.Copy(hi, 0, na, 0, len);
+ hi = na;
+ na = new char[newsize];
+ Array.Copy(eq, 0, na, 0, len);
+ eq = na;
+ na = new char[newsize];
+ Array.Copy(sc, 0, na, 0, len);
+ sc = na;
+ }
+
+ public virtual int Length
+ {
+ get { return length; }
+ }
+
+ public object Clone()
+ {
+ TernaryTree t = new TernaryTree();
+ t.lo = (char[])this.lo.Clone();
+ t.hi = (char[])this.hi.Clone();
+ t.eq = (char[])this.eq.Clone();
+ t.sc = (char[])this.sc.Clone();
+ t.kv = (CharVector)this.kv.Clone();
+ t.root = this.root;
+ t.freenode = this.freenode;
+ t.length = this.length;
+
+ return t;
+ }
+
+ /// <summary>
+ /// Recursively insert the median first and then the median of the lower and
+ /// upper halves, and so on in order to get a balanced tree. The array of keys
+ /// is assumed to be sorted in ascending order.
+ /// </summary>
+ protected internal virtual void InsertBalanced(string[] k, char[] v, int offset, int n)
+ {
+ int m;
+ if (n < 1)
+ {
+ return;
+ }
+ m = n >> 1;
+
+ Insert(k[m + offset], v[m + offset]);
+ InsertBalanced(k, v, offset, m);
+
+ InsertBalanced(k, v, offset + m + 1, n - m - 1);
+ }
+
+ /// <summary>
+ /// Balance the tree for best search performance
+ /// </summary>
+ public virtual void Balance()
+ {
+ // System.out.print("Before root splitchar = ");
+ // System.out.println(sc[root]);
+
+ int i = 0, n = length;
+ string[] k = new string[n];
+ char[] v = new char[n];
+ Iterator iter = new Iterator(this);
+ while (iter.HasMoreElements())
+ {
+ v[i] = iter.Value;
+ iter.MoveNext();
+ k[i++] = iter.Current;
+ }
+ Init();
+ InsertBalanced(k, v, 0, n);
+
+ // With uniform letter distribution sc[root] should be around 'm'
+ // System.out.print("After root splitchar = ");
+ // System.out.println(sc[root]);
+ }
+
+ /// <summary>
+ /// Each node stores a character (splitchar) which is part of some key(s). In a
+ /// compressed branch (one that only contain a single string key) the trailer
+ /// of the key which is not already in nodes is stored externally in the kv
+ /// array. As items are inserted, key substrings decrease. Some substrings may
+ /// completely disappear when the whole branch is totally decompressed. The
+ /// tree is traversed to find the key substrings actually used. In addition,
+ /// duplicate substrings are removed using a map (implemented with a
+ /// TernaryTree!).
+ ///
+ /// </summary>
+ public virtual void TrimToSize()
+ {
+ // first balance the tree for best performance
+ Balance();
+
+ // redimension the node arrays
+ RedimNodeArrays(freenode);
+
+ // ok, compact kv array
+ CharVector kx = new CharVector();
+ kx.Alloc(1);
+ TernaryTree map = new TernaryTree();
+ Compact(kx, map, root);
+ kv = kx;
+ kv.TrimToSize();
+ }
+
+ private void Compact(CharVector kx, TernaryTree map, char p)
+ {
+ int k;
+ if (p == 0)
+ {
+ return;
+ }
+ if (sc[p] == 0xFFFF)
+ {
+ k = map.Find(kv.Array, lo[p]);
+ if (k < 0)
+ {
+ k = kx.Alloc(StrLen(kv.Array, lo[p]) + 1);
+ StrCpy(kx.Array, k, kv.Array, lo[p]);
+ map.Insert(kx.Array, k, (char)k);
+ }
+ lo[p] = (char)k;
+ }
+ else
+ {
+ Compact(kx, map, lo[p]);
+ if (sc[p] != 0)
+ {
+ Compact(kx, map, eq[p]);
+ }
+ Compact(kx, map, hi[p]);
+ }
+ }
+
+ public virtual IEnumerator<string> Keys()
+ {
+ return new Iterator(this);
+ }
+
+ public class Iterator : IEnumerator<string>
+ {
+ private readonly TernaryTree outerInstance;
+
+
+ /// <summary>
+ /// current node index
+ /// </summary>
+ private int cur;
+
+ /// <summary>
+ /// current key
+ /// </summary>
+ private string curkey;
+
+ internal class Item : ICloneable
+ {
+ internal char parent;
+ internal char child;
+
+ public Item()
+ {
+ parent = (char)0;
+ child = (char)0;
+ }
+
+ public Item(char p, char c)
+ {
+ parent = p;
+ child = c;
+ }
+
+ public object Clone()
+ {
+ return new Item(parent, child);
+ }
+
+ }
+
+ /// <summary>
+ /// Node stack
+ /// </summary>
+ internal Stack<Item> ns;
+
+ /// <summary>
+ /// key stack implemented with a StringBuilder
+ /// </summary>
+ internal StringBuilder ks;
+
+ public Iterator(TernaryTree outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ cur = -1;
+ ns = new Stack<Item>();
+ ks = new StringBuilder();
+ Rewind();
+ }
+
+ public virtual void Rewind()
+ {
+ ns.Clear();
+ ks.Length = 0;
+ cur = outerInstance.root;
+ Run();
+ }
+
+ //public override string NextElement()
+ //{
+ // string res = curkey;
+ // cur = up();
+ // run();
+ // return res;
+ //}
+
+ public virtual char Value
+ {
+ get
+ {
+ if (cur >= 0)
+ {
+ return outerInstance.eq[cur];
+ }
+ return (char)0;
+ }
+ }
+
+
+
+ public bool HasMoreElements()
+ {
+ return (cur != -1);
+ }
+
+ /// <summary>
+ /// traverse upwards
+ /// </summary>
+ internal virtual int Up()
+ {
+ Item i = new Item();
+ int res = 0;
+
+ if (ns.Count == 0)
+ {
+ return -1;
+ }
+
+ if (cur != 0 && outerInstance.sc[cur] == 0)
+ {
+ return outerInstance.lo[cur];
+ }
+
+ bool climb = true;
+
+ while (climb)
+ {
+ i = ns.Pop();
+ i.child++;
+ switch ((int)i.child)
+ {
+ case 1:
+ if (outerInstance.sc[i.parent] != 0)
+ {
+ res = outerInstance.eq[i.parent];
+ ns.Push((Item)i.Clone());
+ ks.Append(outerInstance.sc[i.parent]);
+ }
+ else
+ {
+ i.child++;
+ ns.Push((Item)i.Clone());
+ res = outerInstance.hi[i.parent];
+ }
+ climb = false;
+ break;
+
+ case 2:
+ res = outerInstance.hi[i.parent];
+ ns.Push((Item)i.Clone());
+ if (ks.Length > 0)
+ {
+ ks.Length = ks.Length - 1; // pop
+ }
+ climb = false;
+ break;
+
+ default:
+ if (ns.Count == 0)
+ {
+ return -1;
+ }
+ climb = true;
+ break;
+ }
+ }
+ return res;
+ }
+
+ /// <summary>
+ /// traverse the tree to find next key
+ /// </summary>
+ internal virtual int Run()
+ {
+ if (cur == -1)
+ {
+ return -1;
+ }
+
+ bool leaf = false;
+ while (true)
+ {
+ // first go down on low branch until leaf or compressed branch
+ while (cur != 0)
+ {
+ if (outerInstance.sc[cur] == 0xFFFF)
+ {
+ leaf = true;
+ break;
+ }
+ ns.Push(new Item((char)cur, '\u0000'));
+ if (outerInstance.sc[cur] == 0)
+ {
+ leaf = true;
+ break;
+ }
+ cur = outerInstance.lo[cur];
+ }
+ if (leaf)
+ {
+ break;
+ }
+ // nothing found, go up one node and try again
+ cur = Up();
+ if (cur == -1)
+ {
+ return -1;
+ }
+ }
+ // The current node should be a data node and
+ // the key should be in the key stack (at least partially)
+ StringBuilder buf = new StringBuilder(ks.ToString());
+ if (outerInstance.sc[cur] == 0xFFFF)
+ {
+ int p = outerInstance.lo[cur];
+ while (outerInstance.kv[p] != 0)
+ {
+ buf.Append(outerInstance.kv[p++]);
+ }
+ }
+ curkey = buf.ToString();
+ return 0;
+ }
+
+ #region Added for better .NET support
+ public string Current
+ {
+ get
+ {
+ return curkey;
+ }
+ }
+
+ object IEnumerator.Current
+ {
+ get
+ {
+ return curkey;
+ }
+ }
+
+ public void Dispose()
+ {
+ // nothing to do
+ }
+
+ public bool MoveNext()
+ {
+ cur = Up();
+ Run();
+ return cur != -1;
+ }
+
+ public void Reset()
+ {
+ throw new NotImplementedException();
+ }
+
+ #endregion
+ }
+
+ // LUCENENET: Not sure we really need this
+ // public virtual void printStats(PrintStream @out)
+ // {
+ //@out.println("Number of keys = " + Convert.ToString(length));
+ //@out.println("Node count = " + Convert.ToString(freenode));
+ //// System.out.println("Array length = " + Integer.toString(eq.length));
+ //@out.println("Key Array length = " + Convert.ToString(kv.length()));
+
+ ///*
+ // * for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 )
+ // * System.out.print(kv.get(i)); else System.out.println("");
+ // * System.out.println("Keys:"); for(Enumeration enum = keys();
+ // * enum.hasMoreElements(); ) System.out.println(enum.nextElement());
+ // */
+
+ // }
+ /*
+ public static void main(String[] args) {
+ TernaryTree tt = new TernaryTree();
+ tt.insert("Carlos", 'C');
+ tt.insert("Car", 'r');
+ tt.insert("palos", 'l');
+ tt.insert("pa", 'p');
+ tt.trimToSize();
+ System.out.println((char) tt.find("Car"));
+ System.out.println((char) tt.find("Carlos"));
+ System.out.println((char) tt.find("alto"));
+ tt.printStats(System.out);
+ }
+ */
+
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/hyphenation.dtd
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/hyphenation.dtd b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/hyphenation.dtd
new file mode 100644
index 0000000..083c2bd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/hyphenation.dtd
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+ Copyright 1999-2004 The Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- $Id: hyphenation.dtd,v 1.3 2004/02/27 18:34:59 jeremias Exp $ -->
+
+<!ELEMENT hyphenation-info (hyphen-char?, hyphen-min?,
+ classes, exceptions?, patterns)>
+
+<!-- Hyphen character to be used in the exception list as shortcut for
+ <hyphen pre-break="-"/>. Defaults to '-'
+-->
+<!ELEMENT hyphen-char EMPTY>
+<!ATTLIST hyphen-char value CDATA #REQUIRED>
+
+<!-- Default minimun length in characters of hyphenated word fragments
+ before and after the line break. For some languages this is not
+ only for aesthetic purposes, wrong hyphens may be generated if this
+ is not accounted for.
+-->
+<!ELEMENT hyphen-min EMPTY>
+<!ATTLIST hyphen-min before CDATA #REQUIRED>
+<!ATTLIST hyphen-min after CDATA #REQUIRED>
+
+<!-- Character equivalent classes: space separated list of character groups, all
+ characters in a group are to be treated equivalent as far as
+ the hyphenation algorithm is concerned. The first character in a group
+ is the group's equivalent character. Patterns should only contain
+ first characters. It also defines word characters, i.e. a word that
+ contains characters not present in any of the classes is not hyphenated.
+-->
+<!ELEMENT classes (#PCDATA)>
+
+<!-- Hyphenation exceptions: space separated list of hyphenated words.
+ A hyphen is indicated by the hyphen tag, but you can use the
+ hyphen-char defined previously as shortcut. This is in cases
+ when the algorithm procedure finds wrong hyphens or you want
+ to provide your own hyphenation for some words.
+-->
+<!ELEMENT exceptions (#PCDATA|hyphen)* >
+
+<!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
+ characters as described before, between any two word characters a digit
+ in the range 0 to 9 may be specified. The absence of a digit is equivalent
+ to zero. The '.' character is reserved to indicate begining or ending
+ of words. -->
+<!ELEMENT patterns (#PCDATA)>
+
+<!-- A "full hyphen" equivalent to TeX's \discretionary
+ with pre-break, post-break and no-break attributes.
+ To be used in the exceptions list, the hyphen character is not
+ automatically added -->
+<!ELEMENT hyphen EMPTY>
+<!ATTLIST hyphen pre CDATA #IMPLIED>
+<!ATTLIST hyphen no CDATA #IMPLIED>
+<!ATTLIST hyphen post CDATA #IMPLIED>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index a74ed0b..615d1a0 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -42,6 +42,8 @@
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Xml" />
+ <Reference Include="System.Xml.Linq" />
</ItemGroup>
<ItemGroup>
<Compile Include="Analysis\Bg\BulgarianAnalyzer.cs" />
@@ -91,6 +93,19 @@
<Compile Include="Analysis\CommonGrams\CommonGramsFilterFactory.cs" />
<Compile Include="Analysis\CommonGrams\CommonGramsQueryFilter.cs" />
<Compile Include="Analysis\CommonGrams\CommonGramsQueryFilterFactory.cs" />
+ <Compile Include="Analysis\Compound\CompoundWordTokenFilterBase.cs" />
+ <Compile Include="Analysis\Compound\DictionaryCompoundWordTokenFilter.cs" />
+ <Compile Include="Analysis\Compound\DictionaryCompoundWordTokenFilterFactory.cs" />
+ <Compile Include="Analysis\Compound\HyphenationCompoundWordTokenFilter.cs" />
+ <Compile Include="Analysis\Compound\HyphenationCompoundWordTokenFilterFactory.cs" />
+ <Compile Include="Analysis\Compound\Hyphenation\ByteVector.cs" />
+ <Compile Include="Analysis\Compound\Hyphenation\CharVector.cs" />
+ <Compile Include="Analysis\Compound\Hyphenation\Hyphen.cs" />
+ <Compile Include="Analysis\Compound\Hyphenation\Hyphenation.cs" />
+ <Compile Include="Analysis\Compound\Hyphenation\HyphenationTree.cs" />
+ <Compile Include="Analysis\Compound\Hyphenation\PatternConsumer.cs" />
+ <Compile Include="Analysis\Compound\Hyphenation\PatternParser.cs" />
+ <Compile Include="Analysis\Compound\Hyphenation\TernaryTree.cs" />
<Compile Include="Analysis\Core\KeywordAnalyzer.cs" />
<Compile Include="Analysis\Core\KeywordTokenizer.cs" />
<Compile Include="Analysis\Core\KeywordTokenizerFactory.cs" />
@@ -481,6 +496,7 @@
<ItemGroup>
<EmbeddedResource Include="Analysis\Gl\galician.rslp" />
<EmbeddedResource Include="Analysis\Pt\portuguese.rslp" />
+ <EmbeddedResource Include="Analysis\Compound\Hyphenation\hyphenation.dtd" />
<None Include="packages.config" />
</ItemGroup>
<ItemGroup>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs b/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs
index bca2d65..76e9090 100644
--- a/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs
+++ b/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs
@@ -75,6 +75,13 @@ namespace Lucene.Net.Analysis.Tokenattributes
ICharTermAttribute SetEmpty();
// the following methods are redefined to get rid of IOException declaration:
+
+ /// <summary>
+ /// Appends the contents of the {@code ICharSequence} to this character sequence.
+ /// <p>The characters of the {@code ICharSequence} argument are appended, in order, increasing the length of
+ /// this sequence by the length of the argument. If argument is {@code null}, then the four
+ /// characters {@code "null"} are appended.
+ /// </summary>
ICharTermAttribute Append(ICharSequence csq);
ICharTermAttribute Append(ICharSequence csq, int start, int end);
@@ -106,5 +113,7 @@ namespace Lucene.Net.Analysis.Tokenattributes
/// characters {@code "null"} are appended.
/// </summary>
ICharTermAttribute Append(ICharTermAttribute termAtt);
+
+ ICharSequence SubSequence(int start, int end);
}
}
\ No newline at end of file